#!/bin/bash

# 移除旧的临时文件
rm -rf /tmp/io_sample.log
rm -rf /tmp/iomonitor_*.log

IO_THRESHOLD=50
SAVE_TOPN=3
CMD_NAME="ex_iomonitor_daemon"

# 信号处理：优雅退出并清理临时文件
trap 'echo "$(date) [$0] trap exceptional signal! exit." > /dev/kmsg; rm -rf /tmp/iomonitor_*.log >/dev/null 2>&1; exit 0;' SIGHUP SIGINT SIGTERM SIGQUIT SIGKILL

# 设置PATH
PATH="${PATH}:/usr/sbin:/usr/bin:/bin:/sbin"

# 日志记录函数 - 保持不变
function iotop_logger()
{
    logger -it "exmonitor" "[LOC $(date +"%Y-%m-%d:%H:%M:%S")]exmonitor[$(pidof exmonitor)]:" -p info "$1"
}

# 获取磁盘inflight信息函数 - 保持不变
function get_inflight_log()
{
    local disk_name_all disk_name

    disk_name_all=$(ls /sys/block 2>/dev/null)
    for disk_name in $disk_name_all
    do
        if [ -r "/sys/block/$disk_name/inflight" ]; then
            iotop_logger "$disk_name inflight info:$(cat "/sys/block/$disk_name/inflight" 2>/dev/null)"
        fi
    done
}

# 获取iotop日志函数 - 功能不变，但改进健壮性
function get_iotop_log()
{
    local first_line=0
    local head_line=2
    local print_i=0
    local tmp_file=""
    local save_umask last_line io_size

    iotop_logger "$CMD_NAME start"

    head_line=2
    save_umask=$(umask)
    umask 077
    tmp_file=$(mktemp "/tmp/iomonitor_XXXXXXXXXX.log" 2>/dev/null)

    if [ $? -ne 0 ] || [ -z "$tmp_file" ]; then
        iotop_logger "create tmp file failed."
        umask "${save_umask}"
        exit 1
    fi

    umask "${save_umask}"

    # 运行iotop命令
    iotop -n 3 -b -t -d 1 -o > "$tmp_file" 2>&1

    # 查找最后一行Total DISK READ的位置
    last_line=$(grep -n "Total DISK READ" "$tmp_file" | tail -1 | awk -F: '{print $1}')

    if [ -z "$last_line" ]; then
        iotop_logger "iotop run fail."
        rm -rf "$tmp_file" >/dev/null 2>&1
        return
    fi

    # 检查是否是"Actual DISK READ"版本
    if grep -q 'Actual DISK READ' "$tmp_file"; then
        head_line=$((head_line + 1))
    fi

    # 跳过表头
    first_line=$((last_line + head_line))

    # 获取第一个进程的I/O大小
    io_size=$(tail -n "+$first_line" "$tmp_file" | head -n 1 | awk '{print $11}')

    if [ -z "$io_size" ]; then
        # 没有I/O信息
        rm -rf "$tmp_file" >/dev/null 2>&1
        return
    fi

    # 移除小数点和后面部分
    io_size="${io_size%%.*}"

    if [ "$io_size" -lt "$IO_THRESHOLD" ]; then
        rm -rf "$tmp_file" >/dev/null 2>&1
        return
    fi

    # 记录表头信息
    print_i=0
    while [ $print_i -lt $head_line ]; do
        iotop_logger "$(sed -n "${last_line}p" "$tmp_file")"
        last_line=$((last_line + 1))
        print_i=$((print_i + 1))
    done

    # 记录I/O信息
    iotop_logger "$(sed -n "${first_line}p" "$tmp_file")"
    get_inflight_log

    first_line=$((first_line + 1))
    print_i=1

    # 记录后续的高I/O进程（最多SAVE_TOPN-1个）
    while [ $print_i -lt $SAVE_TOPN ]; do
        io_size=$(tail -n "+$first_line" "$tmp_file" | head -n 1 | awk '{print $11}')

        if [ -z "$io_size" ]; then
            # 没有I/O信息
            break
        fi

        io_size="${io_size%%.*}"

        if [ "$io_size" -lt "$IO_THRESHOLD" ]; then
            break
        fi

        iotop_logger "$(sed -n "${first_line}p" "$tmp_file")"
        get_inflight_log
        first_line=$((first_line + 1))
        print_i=$((print_i + 1))
    done

    rm -rf "$tmp_file" >/dev/null 2>&1
}

# 主执行逻辑
get_iotop_log
exit 0