#!/usr/bin/env python3
# -*- coding: utf-8 -*-
###################################################
#Copyright (C), 2020-2026, KylinSoft. Co., Ltd.
#File name: oomcheck.py
#Author: yujingbo@kylinos.cn
#Version: 3.0
#Description: analyze dmesg log, diagnose oom reason
#Date:2026-02-03
#Others: None
#Function List:
#    - dinfo_diag:       analyze the overal dmesg log
#    - dinfo_analyze:    detailed analysis oom-killer log
#    - analyzereason:    diagnose oom reason
###################################################

import socket
import getopt
import threading
from subprocess import *
import os, fcntl, re, sys
import time,datetime
from time import sleep
import argparse
import json,base64,hashlib,re
import sched
import importlib
import json
import traceback

DRES_CGROUP = '内存使用达到Cgroup上限'
DRES_PARENTCGROUP = '内存使用达到父Cgroup上限'
DRES_HOSTMEM = '系统内存不足'
DRES_CPUSETMEM = 'CPUSET 的mems值设置不合理'
DRES_MEMFRAG = '存在内存碎片化'
DRES_MEMPOLICY = 'NUMA内存策略限制'

DMESG_WORD = "invoked oom-killer"
DMESG_ENDWORD_KILLED = "Killed process"
DMESG_ENDWORD_REAP = "reaped process"
DMESG_ENDWORD_CONS = "oom-kill:constraint"
DWORD_CGROUP = "Task in /"
DWORD_NORMAL = "Normal: "
cm_pattern = re.compile("cpuset=([^, ]+)[ ,]+mems_allowed=([0-9\-\/\,]*)")
nmask_pattern = re.compile("nodemask=\(?([0-9\-\/\,null]*)\)?")
minfo_pattern = ([re.compile("(free):(\S+) (free_pcp):(\S+) (free_cma):(\S+)"),
    re.compile("(slab_reclaimable):(\S+) (slab_unreclaimable):(\S+)"), 
    re.compile("(unevictable):(\S+) (dirty):(\S+) (writeback):(\S+)"), 
    re.compile("(mapped):(\S+) (shmem):(\S+) (pagetables):(\S+) bounce:\S+"), 
    re.compile("(active_file):(\S+) (inactive_file):(\S+) (isolated_file):(\S+)"), 
    re.compile("(active_anon):(\S+) (inactive_anon):(\S+) (isolated_anon):(\S+)")])

def listconvert(strings):
    resultlist = []
    if not strings:
        return resultlist
    sset = strings.split(',')
    for pl in sset:
        pl = pl.strip()
        if not pl:
            continue
        if pl[0] == '(' and pl[-1] == ')':
            pl = pl[1:-1]
        if "null" in pl:
            resultlist.append(-1)
            break
        if "-" in pl:
            resultlist.extend([i for i in range(int(pl.split('-')[0]), int(pl.split('-')[1])+1)])
        else:
            resultlist.append(int(pl))
    return resultlist

def unitconvert(memvalue):
    res = 0
    try:
        if 'G' in memvalue:
            res = str(int(memvalue.rstrip('G')) * 1024*1024*1024)
        elif 'g' in memvalue:
            res = str(int(memvalue.rstrip('g')) * 1024*1024*1024)
        if 'M' in memvalue:
            res = str(int(memvalue.rstrip('M')) * 1024*1024)
        elif 'm' in memvalue:
            res = str(int(memvalue.rstrip('m')) * 1024*1024)
        if 'kB' in memvalue:
            res = str(int(memvalue.rstrip('kB')) * 1024)
        elif 'KB' in memvalue:
            res = str(int(memvalue.rstrip('KB')) * 1024)
        elif 'k' in memvalue:
            res = str(int(memvalue.rstrip('k')) * 1024)
        elif 'K' in memvalue:
            res = str(int(memvalue.rstrip('K')) * 1024)
        return res
    except:
        return memvalue


def extractprocid(diagres, pl, num):
    if DMESG_ENDWORD_KILLED in pl:
        split_line = DMESG_ENDWORD_KILLED
    elif DMESG_ENDWORD_REAP in pl:
        split_line = DMESG_ENDWORD_REAP
    else:
        tmp = pl.split("pid=")[1]
        pid = tmp.split(",")[0] 
        diagres['oommsgs'][num]['pid'] = pid
        return
    procid = pl.strip().split(split_line)[1]
    diagres['oommsgs'][num]['pid'] = procid.strip().split()[0]

def memorycalculate(diagres, pl, num):
    arss = 0
    frss = 0
    srss = 0
    if "anon-rss:" in pl:
        tmp = pl.strip().split('anon-rss:')[1]
        arss = tmp.split()[0].strip(',')
    if "file-rss:" in pl:
        tmp = pl.strip().split('file-rss:')[1]
        frss = tmp.split()[0].strip(',')
    if "shmem-rss:" in pl:
        tmp = pl.strip().split('shmem-rss:')[1]
        srss = tmp.split()[0].strip(',')
    diagres['oommsgs'][num]['tkilledmem'] = (
        int(unitconvert(arss)) + int(unitconvert(frss)) + int(unitconvert(srss)))/1024

def extracthpage(diagres, pl, num):
    if "hugepages_total" not in pl or "hugepages_size" not in pl:
        return True
    tinfo = diagres['oommsgs'][num]
    if 'hugepage' not in tinfo['meminfo']:
        tinfo['meminfo']['hugepage'] = 0
    tinfo['meminfo']['hugepage'] = tinfo['meminfo']['hugepage'] + int(pl.split('hugepages_total=')[1].strip().split()[0])*int(pl.split('hugepages_size=')[1].strip()[:-2])

def extractmemory(diagres, pls, index, num):
    tinfo = diagres['oommsgs'][num]
    tinfo['meminfo']['total_mem'] = 0
    tinfo['meminfo']['free'] = 0
    tinfo['meminfo']['free_pcp'] = 0
    tinfo['meminfo']['slab'] = 0
    tinfo['meminfo']['slabr'] = 0
    tinfo['meminfo']['rmem'] = 0
    tinfo['meminfo']['hugepage'] = 0
    tinfo['meminfo']['active_file'] = 0
    tinfo['meminfo']['inactive_file'] = 0
    tinfo['meminfo']['active_anon'] = 0
    tinfo['meminfo']['inactive_anon'] = 0
    tinfo['meminfo']['unevictable'] = 0
    tinfo['meminfo']['pagetables'] = 0
    if len(pls) < 10:
        return True
    pl = pls
    for key in range(index, len(pls)):
        pl = pls[key]
        if "active_anon:" in pl and "inactive_anon:" in pl:
            break
    if key >= len(pls) -5:
        return True

    index = key
    for pattern in minfo_pattern:
        pl = pls[index]
        index += 1
        gp = pattern.search(pl)
        if gp:
            for i in range(1,len(gp.groups()),2):
                if gp.group(i) == 'slab_unreclaimable':
                    tinfo['meminfo']['slab'] = int(gp.group(i+1))*4
                if gp.group(i) == 'slab_reclaimable':
                    tinfo['meminfo']['slabr'] = int(gp.group(i+1))*4
                else:
                    tinfo['meminfo'][gp.group(i)] = int(gp.group(i+1))*4
    return True

def ismemoryfrag(tinfo):
    if tinfo['host_free'] > tinfo['host_low'] and tinfo['order'] >=1 and tinfo['order'] <=3:
        return True
    return False

def reasonmsghost(diagres, num):
    tinfo = diagres['oommsgs'][num]
    reasonsum = ''
    if tinfo['json'] != DRES_HOSTMEM:
        return reasonsum
    istrue = False
    if int(tinfo['host_free'][:-2]) * 0.9  < int(tinfo['host_low'][:-2]):
        istrue = True
    tinfo['root'] = 'limit'
    if 'mems_allowed' in tinfo and tinfo['mems_allowed'][0] != -1 and diagres['node_num'] != len(tinfo['mems_allowed']) and istrue:
        tinfo['reason'] = DRES_CPUSETMEM
        tinfo['root'] = 'cpuset'
        reasonsum += "node数量:%d\n"%(diagres['node_num'])
        reasonsum += "cpuset:%s,"%(tinfo['cpuset'])
        reasonsum += "cpuset配置:"
        for node in tinfo['mems_allowed']:
            reasonsum +="%s "%(node)
        reasonsum += "\n"
        reasonsum += "node free:%s,"%(tinfo['host_free'])
        reasonsum += "low:%s\n"%(tinfo['host_low'])
        return reasonsum
    elif 'nodemask' in tinfo and tinfo['nodemask'][0] != -1 and len(tinfo['nodemask']) != diagres['node_num'] and int(tinfo['host_free'][:-2]) > int(tinfo['host_low'][:-2]) * 2:
        tinfo['reason'] = DRES_MEMPOLICY
        tinfo['root'] = 'policy'
        reasonsum += "node数量:%d\n"%(diagres['node_num'])
        reasonsum += "nodemask配置:"
        for node in tinfo['nodemask']:
            reasonsum +="%s "%(node)
        reasonsum += "\n"
        reasonsum += "node free:%s,"%(tinfo['host_free'])
        reasonsum += "low:%s\n"%(tinfo['host_low'])
        return reasonsum
    elif ismemoryfrag(tinfo):
        reasonsum += "order:%d\n"%(tinfo['order'])
        tinfo['reason'] = DRES_MEMFRAG
        tinfo['root'] = 'frag'
        tinfo['json']['order'] = tinfo['order']
    reasonsum += "host free:%s,"%(tinfo['host_free'])
    reasonsum += "low:%s\n"%(tinfo['host_low'])

    tinfo['json']['host_free'] = tinfo['host_free']
    tinfo['json']['host_low'] = tinfo['host_low']
    return reasonsum

def reasonmsgcgroup(diagres, num):
    reasonsum = ''
    tinfo = diagres['oommsgs'][num]
    if tinfo['reason'] != DRES_PARENTCGROUP and tinfo['reason'] != DRES_CGROUP:
        return reasonsum
    cname = "cgroup"
    if tinfo['podName'] != 'unknown':
        cname = 'pod'
    elif tinfo['containerID'] != 'unknown':
        cname = 'container'
    reasonsum += "%s内存使用: %s,"%(cname, tinfo['cg_usage'])
    reasonsum += " 限制大小: %s\n"%(tinfo['cg_limit'])
    reasonsum += "cgroup信息: %s\n"%(tinfo['cg_name'])
    tinfo['json']['cg_usage'] = tinfo['cg_usage']
    tinfo['json']['cg_limit'] = tinfo['cg_limit']
    tinfo['json']['cgroup_oom_num'] = diagres['cgroup'][tinfo['cg_name']]
    return reasonsum

def reasonmsgcgroupshmem(diagres, num):
    reasonsum = ''
    tinfo = diagres['oommsgs'][num]
    reason = tinfo['reason']
    if tinfo['reason'] != DRES_PARENTCGROUP and tinfo['reason'] != DRES_CGROUP:
        return reasonsum
    if tinfo['root'] == 'limit' and tinfo['reason'] ==  DRES_PARENTCGROUP:
        tinfo['root'] = 'plimit'
    anon = int(tinfo["cg_inanon"]) + int(tinfo["cg_anon"]) - int(tinfo["cg_rss"])
    if anon > int(tinfo['cg_usage'][:-2])*0.3:
        reasonsum = ", 共享内存使用 %dKB"%(anon)
        tinfo['root'] = 'shmem'
        tinfo['json']['shmem'] = anon
    return reasonsum


def procscorecheck(tinfo, diagres):
    res = diagres['max']
    rtotal = diagres['max_total']
    if  diagres['max']['pid'] == 0:
        return False, "\n"
    if int(tinfo['pid'].strip()) ==  diagres['max']['pid']:
        return False, '\n'
    if  diagres['max']['score'] >= 0:
        return False, "\n"
    many = False
    if (diagres['max_total']['cnt']) > 2 and (diagres['max_total']['rss']*0.8 >  diagres['max']['rss']):
        many = True
    if  diagres['max']['task'] == diagres['max_total']['task'] or many == False:
        return True, '，process:%s(%s) 内存使用: %dKB,oom_score_adj:%s\n'%( diagres['max']['task'], diagres['max']['pid'], diagres['max']['rss']*4, diagres['max']['score'])
    return False, "\n"

def extractpodinfo(diagres,num):
    return ''

def extractmemuseinfo(diagres, num, key, pls):
    tmp = {}
    while(key < len(pls)):
        if "workingset" in pls[key]:
            break
        if "] anon " in pls[key]:
            gp  = re.search("\] (\S*) (\d*)$",pls[key])
        else:
            gp  = re.match("^(\S*) (\d*)$",pls[key])
        if not gp:
            break
        tmp[gp.group(1)] = int(gp.group(2))/1024
        key += 1
    cg_usage = int(diagres['oommsgs'][num]['cg_usage'][:-2])
    try:
        thresh =  0.85*(cg_usage - tmp['inactive_anon'] - tmp['active_anon'])
        for i in tmp:
            if tmp[i] >= thresh:
                diagres['oommsgs'][num]['cgroup_major_used'] = {'name':i, 'value':tmp[i]}
                break
    except:
        pass

def initoutmsg(diagres, num):
    diagres['oommsgs'][num]['json'] = {}
    res = diagres['oommsgs'][num]['json']
    res['pid'] = '-unknown-'
    res['task'] = '-unknown-'
    res['task_mem'] = 0
    res['shmem'] = 0
    res['total_rss'] = 0
    res['podName'] = '-unknown-'
    res['containerID'] = '-unknown-'
    res['cg_usage'] = 0
    res['cg_limit'] = 0
    res['cgroup_oom_num'] = 0
    res['root'] = '-unknown-'
    res['type'] = '-unknown-'

def generateoutmsg(diagres,num, msgsum):
    tinfo = diagres['oommsgs'][num]
    initoutmsg(diagres, num)
    tinfo['json']['rss_list_desc'] = tinfo['rss_list_desc']
    tinfo['json']['total_oom'] = diagres['msgsnum']
    tinfo['json']['cg_name'] = tinfo['cg_name']
    tinfo['json']['host_free'] = tinfo.get('host_free',0)
    tinfo['json']['host_low'] = tinfo.get('host_low',0)
    tinfo['json']['cgroup_major_used'] = tinfo.get('cgroup_major_used', {})
    reason = ''
    if tinfo['tkilledmem'] == 0 and tinfo['pid'] in tinfo['state_mem']:
        tinfo['tkilledmem'] = tinfo['state_mem'][tinfo['pid']]
        tinfo['tkilledmem'] = tinfo['tkilledmem']
    tinfo['json']['task'] = tinfo['task_name'][1:-1]
    tinfo['json']['pid'] = tinfo['pid']
    tinfo['json']['task_mem'] = tinfo['tkilledmem']
    tinfo['json']['total_rss'] = tinfo['state_mem']['total_rss']
    msgsum += "进程信息: %s(%s), 内存大小: %sKB\n"%(tinfo['task_name'][1:-1], tinfo['pid'], tinfo['tkilledmem'])
    tinfo['root'] = 'limit'
    if tinfo['cg_name'] in diagres['cgroup']:
        msgsum += extractpodinfo(diagres, num)
    msgsum += reasonmsgcgroup(diagres, num)
    msgsum += reasonmsghost(diagres, num)
    reason = "诊断结果: %s "%(tinfo['reason'])
    reason += reasonmsgcgroupshmem(diagres, num)
    if 'cmdline' in diagres:
        reason += diagres['cmdline']
    ret, sss = procscorecheck(tinfo, diagres)
    if ret == False:
        rtscinfo = diagres['max_total']
        rtscmsg = '\n'
        if (rtscinfo['rss']*4 > tinfo['tkilledmem']*1.5) and (rtscinfo['cnt'] > 2):
            tinfo['root'] = 'fork'
            rtscmsg = ', %d process:%s total memory usage: %dKB\n'%(rtscinfo['cnt'],rtscinfo['task'],rtscinfo['rss']*4)
            tinfo['json']['fork_max_task'] = rtscinfo['task']
            tinfo['json']['fork_max_cnt'] = rtscinfo['cnt']
            tinfo['json']['fork_max_usage'] = rtscinfo['rss'] * 4
        reason += rtscmsg
    else:
        reason += sss
    if tinfo['type']  == 'cgroup':
        if 'msg' in tinfo['state_mem']:
            msgsum += "进程相关信息:\n"
            for line in tinfo['state_mem']['msg']:
                msgsum += line +'\n'
    if 'kernelUsed' in tinfo['meminfo']:
        msgsum += "mem info: total:%sKB, user used:%sKB, kernel used:%s KB\nuser file used:%sKB, user anon used:%s KB, kernel resevred:%sKB, kernel page used:%sKB kernel uslab:%sKB" %(
                tinfo['meminfo']['total_mem'],tinfo['meminfo']['userUsed'], tinfo['meminfo']['kernelUsed'],tinfo['meminfo']['active_file']+tinfo['meminfo']['inactive_file'], tinfo['meminfo']['active_anon']+tinfo['meminfo']['inactive_anon'],tinfo['meminfo']['rmem'],tinfo['meminfo']['allocPage'], tinfo['meminfo']['slab'])
    tinfo['json']['root'] = tinfo['root']
    tinfo['json']['type'] = tinfo['type']
    tinfo['json']['result'] = reason
    tinfo['json']['msg'] = msgsum
    return reason + msgsum

def extracttaskstate(num, diagres):
    tinfo = diagres['oommsgs'][num]
    dtask = False
    rall = {}
    tinfo['state_mem']['msg'] = []
    tinfo['state_mem']['total_rss'] = 0
    sc = 2
    if 'oom_score_adj' not in '\n'.join(tinfo['oom_msg']):
        dtask = True
    for pl in tinfo['oom_msg']:
        try:
            if 'rss' in pl and 'oom_score_adj' in pl and 'name' in pl:
                dtask = True
                tinfo['state_mem']['msg'].append(pl)
                sc = pl.count('[')
                continue
            if not dtask:
                continue
            if 'Out of memory' in pl:
                break
            if DMESG_ENDWORD_KILLED in pl or DMESG_ENDWORD_REAP in pl or DMESG_ENDWORD_CONS in pl:
                break
            if pl.count('[')  != sc:
                break
            pidx = pl.rfind('[')
            lidx = pl.rfind(']')
            if pidx == -1 or lidx == -1:
                continue
            lsplit = pl[lidx+1:].strip().split()
            if len(lsplit) < 3:
                continue
            if lsplit[-1] not in rall:
                rall[lsplit[-1]] = {}
                rall[lsplit[-1]]['rss'] = int(lsplit[3])
                rall[lsplit[-1]]['cnt'] = 1
            else:
                rall[lsplit[-1]]['cnt'] += 1
                rall[lsplit[-1]]['rss'] += int(lsplit[3])
            tinfo['state_mem']['msg'].append(pl)
            tinfo['state_mem']['total_rss'] += int(lsplit[3]) *4
            tinfo['state_mem'][str(int(pl[pidx+1:lidx].strip()))] = int(lsplit[3]) *4
            if int(lsplit[3]) >  diagres['max']['rss']:
                diagres['max']['task'] = lsplit[-1]
                diagres['max']['rss'] = int(lsplit[3])
                diagres['max']['pid'] = int(pl[pidx+1:lidx].strip())
                diagres['max']['score'] = int(lsplit[-2])
            if rall[lsplit[-1]]['rss'] >  diagres['max_total']['rss']:
                diagres['max_total']['rss'] = int(rall[lsplit[-1]]['rss'])
                diagres['max_total']['task'] = lsplit[-1]
                diagres['max_total']['score'] = int(lsplit[-2])
                diagres['max_total']['cnt'] = int(rall[lsplit[-1]]['cnt'])
        except Exception as err:
            print("extracttaskstate error {} lines {}\n".format(err, traceback.print_exc()))
            continue
    tinfo['rss_all'] = rall
    tinfo['rss_list_desc'] = []
    for task_info in sorted(tinfo['rss_all'].items(),key=lambda k:k[1]['rss'], reverse=True)[0:10]:
        task = task_info[0]
        tinfo['rss_list_desc'].append({'task':task, 'rss':tinfo['rss_all'][task]['rss']})
    return diagres['max']

def caltotalmemused(diagres,num):
    meminfo = diagres['oommsgs'][num]['meminfo']
    if len(meminfo) == 0:
        return
    umem = meminfo["active_anon"] + meminfo["inactive_anon"]
    umem += meminfo["active_file"] + meminfo["inactive_file"]
    if "hugepage" in meminfo:
        umem += meminfo["hugepage"]
    pused = meminfo["total_mem"] - meminfo["free"] - umem - meminfo["slab"] + meminfo["slabr"]  + meminfo["pagetables"] + meminfo['unevictable']
    if pused < 1:
        pused = 1024
    meminfo["allocPage"] = pused
    meminfo["kernelUsed"] = pused + meminfo["rmem"] + meminfo["slab"] + meminfo["slabr"]  + meminfo["pagetables"] + meminfo['unevictable']
    meminfo["userUsed"] = umem
    meminfo["kernelOther"] = meminfo["slab"] + meminfo["slabr"]  + meminfo["pagetables"] + meminfo['unevictable']

def extractunslabmem(diagres, line, num, key, lines):
    column = {}
    for i in range(key+2,len(lines)):
        items = lines[i].split()
        if not (len(items) >=3 and items[-1].endswith('KB') and items[-2].endswith('KB')):
            break
        column[items[-3]] = {'active':int(items[-2][:-2]), 'total':int(items[-1][:-2])}
    tmprss = sorted(column.items(),key=lambda k:k[1]['total'], reverse=True)[0:10]
    meminfo = diagres['oommsgs'][num]['meminfo']['topuslab'] = tmprss

def analyzereason(num, diagres, summary):
    try:
        nnum = 0
        for key in range(len(diagres['oommsgs'][num]['oom_msg'])):
            try:
                pl = diagres['oommsgs'][num]['oom_msg'][key]
                if "invoked oom-killer" in pl:
                    order = int(pl.strip().split("order=")[1].split()[0][:-1])
                    diagres['oommsgs'][num]['order'] = order
                elif DMESG_ENDWORD_CONS in pl:
                    if "CONSTRAINT_MEMCG" in pl:
                        diagres['oommsgs'][num]['reason'] = DRES_CGROUP
                        diagres['oommsgs'][num]['type'] = 'cgroup'
                        cgroup = pl.split("task_memcg=")[1].split(",")[0]
                        pcgroup = pl.split("oom_memcg=")[1].split(",")[0]
                        if pcgroup != cgroup:
                            diagres['oommsgs'][num]['reason'] = DRES_PARENTCGROUP
                        diagres['oommsgs'][num]['cg_name'] = cgroup
                    memorycalculate(diagres, pl, num)
                    extractprocid(diagres, pl, num)
                if 'nodemask' in pl:
                    if nmask_pattern.search(pl):
                        diagres['oommsgs'][num]['nodemask'] = listconvert(nmask_pattern.search(pl).group(1))
                if "mems_allowed=" in pl:
                    if cm_pattern.search(pl):
                        diagres['oommsgs'][num]['mems_allowed'] = listconvert(cm_pattern.search(pl).group(2))
                        diagres['oommsgs'][num]['cpuset'] = cm_pattern.search(pl).group(1)
                elif DMESG_ENDWORD_KILLED in pl or DMESG_ENDWORD_REAP in pl:
                    memorycalculate(diagres, pl, num)
                    extractprocid(diagres, pl, num)
                elif "Task in" in pl:
                    cgroup = pl.strip().split("Task in")[1].strip().split()[0]
                    pcgroup = pl.strip().split("Task in")[1].strip().split()[-1]
                    if "limit of host" in pl:
                        diagres['oommsgs'][num]['type'] = 'host'
                    else:
                        if cgroup != pcgroup:
                            diagres['oommsgs'][num]['reason'] = DRES_PARENTCGROUP
                        else:
                            diagres['oommsgs'][num]['reason'] = DRES_CGROUP
                        diagres['oommsgs'][num]['type'] = 'cgroup'
                    diagres['oommsgs'][num]['cg_name'] = cgroup
                elif "memory: usage" in pl:
                    memory_limit = pl.strip().split('limit')[1].split()[0].strip(',')
                    diagres['oommsgs'][num]['cg_limit'] = memory_limit
                    memory_usage = pl.strip().split('memory: usage')[1].split()[0].strip(',')
                    diagres['oommsgs'][num]['cg_usage'] = memory_usage
                elif "Memory cgroup stats for" in pl:
                    if "inactive_anon:" not in pl:
                        diagres['oommsgs'][num]['cg_anon'] = 0;
                        diagres['oommsgs'][num]['cg_rss'] = 0;
                        diagres['oommsgs'][num]['cg_inanon'] = 0;
                    else:
                        diagres['oommsgs'][num]['cg_inanon'] = pl.strip().split("inactive_anon:")[1].split()[0][:-2]
                        diagres['oommsgs'][num]['cg_rss'] = pl.strip().split(" rss:")[1].split()[0][:-2]
                        diagres['oommsgs'][num]['cg_anon'] = pl.strip().split(" active_anon:")[1].split()[0][:-2]
                elif "Mem-Info:" in pl:
                    extractmemory(diagres, diagres['oommsgs'][num]['oom_msg'], key,num)
                elif "Normal free:" in pl:
                    diagres['oommsgs'][num]['type'] = 'host'
                    diagres['oommsgs'][num]['reason'] = DRES_HOSTMEM
                    diagres['oommsgs'][num]['host_low'] = pl.strip().split('low:')[1].split()[0]
                    diagres['oommsgs'][num]['host_free'] = pl.strip().split('Normal free:')[1].split()[0]
                elif "pages reserved" in pl:
                    diagres['oommsgs'][num]['meminfo']['rmem'] = int(pl.strip().split()[-3])*4
                    diagres['oommsgs'][num]['meminfo']['total_mem'] -= int(pl.strip().split()[-3])*4
                elif "pages RAM" in pl:
                    diagres['oommsgs'][num]['meminfo']['total_mem'] = int(pl.strip().split()[-3])*4
                elif "hugepages_total" in pl:
                    if "hugepages_size=1048576" in pl:
                        nnum += 1
                    extracthpage(diagres, pl, num)
                elif "Unreclaimable slab info" in pl:
                    extractunslabmem(diagres, pl, num, key, diagres['oommsgs'][num]['oom_msg'])
                elif "] anon " in pl:
                    extractmemuseinfo(diagres, num, key, diagres['oommsgs'][num]['oom_msg'])
            except Exception as err: 
                print("analyzereason loop err {}\n".format(err))
                continue
        caltotalmemused(diagres,num)
        diagres['node_num'] = nnum
        if 'cgroup_major_used' in diagres['oommsgs'][num]:
            diagres['oommsgs'][num]['reason'] = '%s, %s used over 85%% memory (%dKB)' %(diagres['oommsgs'][num]['reason'],diagres['oommsgs'][num]['cgroup_major_used']['name'],diagres['oommsgs'][num]['cgroup_major_used']['value'])
        summary = generateoutmsg(diagres, num, summary)
        diagres['oommsgs'][num]['summary'] = summary
        if diagres['json'] == 1:
            pass
        else:
            print(summary)
        return summary
    except Exception as err:
        print("analyzereason err {}\n".format(err))
        return ""

def inittotalinfo(tinfo):
    tinfo['pid'] = "0"
    tinfo['reason'] = ''
    tinfo['oom_msg'] = []
    tinfo['time'] = 0
    tinfo['cg_name'] = '-unknown-'
    tinfo['podName'] = '-unknown-'
    tinfo['containerID'] = '-unknown-'
    tinfo['cg_usage'] = 0
    tinfo['summary'] = ''
    tinfo['cg_limit'] = 0
    tinfo['task_name'] = '-unknown-'
    tinfo['tkilledmem'] = 0
    tinfo['state_mem'] = {}
    tinfo['meminfo'] = {}
    tinfo['root'] = '-unknown-'
    tinfo['type'] = '-unknown-'

def dinfo_analyze(dmesgs, diagres):
    try:
        oombegin = False
        endline = 0
        sline = -1
        dmesg = dmesgs.splitlines()
        tname = "-unknown-"
        if "-unknow-" not in diagres['task']:
            diagres['task']['-unknown-'] = 0
        for pl in dmesg:
            pl = pl.strip()
            indexcheck = pl.rfind('[')
            if indexcheck > 0:
                continue
            if "Command line" in pl:
                if "elfcorehdr" in pl and "crashkernel" not in pl:
                    elf = pl.split("elfcorehdr=")[1].split()[0]
                    diagres['cmdline'] = ". 未预留内存给第二内核, elfcorehdr:%s" % elf
            if DMESG_WORD in pl:
                if oombegin:
                    diagres['oommsgs'][diagres['msgsnum']]['oom_msg'] = dmesg[sline: dmesg.index(pl)]
                oombegin = True
                sline = dmesg.index(pl)
                diagres['msgsnum'] += 1
                diagres['oommsgs'][diagres['msgsnum']] = {}
                inittotalinfo(diagres['oommsgs'][diagres['msgsnum']])
            if DMESG_ENDWORD_KILLED in pl or DMESG_ENDWORD_REAP in pl or DMESG_ENDWORD_CONS in pl:
                if sline >= 0:
                    diagres['oommsgs'][diagres['msgsnum']]['oom_msg'] = dmesg[sline:dmesg.index(pl)+1]
                elif dmesg.index(pl) - endline < 10:
                    oombegin = False
                    sline = -1
                    endline = dmesg.index(pl)
                    continue
                else:
                    diagres['msgsnum'] += 1
                    diagres['oommsgs'][diagres['msgsnum']] = {}
                    inittotalinfo(diagres['oommsgs'][diagres['msgsnum']])
                    diagres['oommsgs'][diagres['msgsnum']]['oom_msg'] = dmesg[endline + 1: dmesg.index(pl)+1]
                oombegin = False
                sline = -1
                endline = dmesg.index(pl)
                if DMESG_ENDWORD_REAP in pl:
                    oomkillrline = DMESG_ENDWORD_REAP
                elif DMESG_ENDWORD_CONS in pl:
                    oomkillrline = DMESG_ENDWORD_CONS
                elif DMESG_ENDWORD_KILLED in pl:
                    oomkillrline = DMESG_ENDWORD_KILLED
                if DMESG_ENDWORD_CONS in pl:
                    tname = "(" + pl.split(oomkillrline)[1].split("task=")[1].split(",")[0] + ")"
                    diagres['oommsgs'][diagres['msgsnum']]['cgroup_name'] = pl.split('task_memcg=')[1].split(",")[0]
                    if pl.split('task_memcg=')[1].split(",")[0] not in diagres['cgroup']:
                        diagres['cgroup'][pl.split('task_memcg=')[1].split(",")[0]] = 1
                    else:
                        diagres['cgroup'][pl.split('task_memcg=')[1].split(",")[0]] += 1
                else:
                    tname = pl.split(oomkillrline)[1].split()[1].strip(',')
                diagres['oommsgs'][diagres['msgsnum']]['task_name'] = tname
                if tname not in diagres['task']:
                    diagres['task'][tname] = 1
                else:
                    diagres['task'][tname] += 1
            if DWORD_CGROUP in pl:
                diagres['oommsgs'][diagres['msgsnum']]['cgroup_name'] = pl.split('Task in')[1].split()[0]
                if pl.split('Task in')[1].split()[0] not in diagres['cgroup']:
                    diagres['cgroup'][pl.split('Task in')[1].split()[0]] = 1
                else:
                    diagres['cgroup'][pl.split('Task in')[1].split()[0]] += 1
        if diagres["msgsnum"] == 0:
            print("请指定dmesg日志进行诊断！")
            pusage()
            sys.exit(0)
        if oombegin:
                diagres['oommsgs'][diagres['msgsnum']]['oom_msg'] = dmesg[sline:]
    except Exception as err:
        traceback.print_exc()
        print("dinfo_analyze failed {}\n".format(err))

def dinfo_diag(dinfo):
    try:
        diagres = {}
        diagres['task'] = {}
        diagres['summary'] = ""
        diagres['json'] = 0
        diagres['msgsnum'] = 0
        diagres['cgroup'] = {}
        diagres['oommsgs'] = {}
        diagres['last_time'] = {}
        diagres['tdiag'] = dinfo['dtime']
        diagres['time'] = []
        diagres['max_total'] = {'rss':0,'task':"",'score':0,'cnt':0}
        diagres['max'] = {'rss':0,'task':"",'score':0,'pid':0}
        dmesgs = dinfo['dmesg']
        if DMESG_WORD in dmesgs:
            dinfo_analyze(dmesgs, diagres)
            last = diagres["msgsnum"]-dinfo['num']+1
            if last <= 0 :
                last = 1
            output_json = {}
            for i in range(last,diagres["msgsnum"]+1):
                extracttaskstate(i, diagres)
                submsg = analyzereason(i, diagres, diagres['summary'])
                output_json[str(diagres['oommsgs'][i]['time'])] = diagres['oommsgs'][i]['json']
            if diagres['json'] == 1:
                print(json.dumps(output_json, ensure_ascii=False))
            diagres['summary'] = submsg
        if diagres["msgsnum"] == 0:
            print("请指定包含OOM事件的dmesg日志进行诊断！")
        dinfo['diag_result'] = diagres

    except Exception as err:
        traceback.print_exc()
        print( "dmesg diagnose failed {}".format(err))
        dinfo['diag_result'] = diagres

def main():
    dinfo = {}
    dinfo['num'] = 1
    try:
        opts,args = getopt.getopt(sys.argv[1:],"hf:",["help","file="])
    except getopt.GetoptError as err:
        pusage()
        sys.exit(-1)
    if len(opts) == 0:
        pusage()
        sys.exit(-1)
    for name,value in opts:
        if name in ("-h","--help"):
            pusage()
            sys.exit(0)
        elif name in ("-f","--file"):
            dinfo['filename'] = value
    dinfo['dtime'] = int(time.time())
    with open(dinfo['filename'], 'r') as f:
        dinfo['dmesg'] = f.read().strip()
    dinfo_diag(dinfo)

def pusage():
    print(
    """
    usage:
       -h
       -f dmesg.log
       for example:
       ./oomcheck.py -f dmesg.log
    """
    )

if __name__ == "__main__":
    main()
