#!/bin/sh
#*************************************************
#Copyright (c) KylinSoft Co., Ltd. [2025].All rights reserved. 
#File name: kylin-ha-log-analyzer
#Author: XuXiaojuan
#Version: 2
#Description: Log analysis script
#Date: 2022.9.26
#Others: 
#Function List: 
#History: 
#1. Date: 2022.9.26 Author: XuXiaojuan Modification:add the kylin-ha-log-analyzer script
#*************************************************

export LC_ALL=C

scriptname=$(basename "$0")
version="kylin ha log analyzer 2.0"
usage="Usage: $0 [OPTION]... 
Required option:
  -f     Start time,format \"YYYY-MM-DD HH:MM:SS\"
  -o     The dir of output file
Options:

  -f     Start time,format \"YYYY-MM-DD HH:MM:SS\"
  -t     End time, format YYYY-MM-DD HH:MM:SS.If this parameter is defined, the -f parameter must be defined
  --level   The log level, Value can be:event,error,fence, default is error 
  -o        The dir of output file
  -s        Collect the system info
  -c        Collect the cluster info
  -p 	    Search for log information of matching patterns
  -a        Analyze the current cluster status
      --help     Display this help and exit
      --version  Output version information and exit

Examples:
   $scriptname -f \"2025-03-15 13:05:00\" -t \"2025-03-15 18:00:00\" -o /home/cluster/loganalyzer
   $scriptname -f \"2025-03-15 13:05:00\" -a -c -p test -o /home/cluster/loganalyzer
   $scriptname -f \"2025-03-15 13:05:00\" --level fence -o /home/cluster/loganalyzer
   $scriptname -f \"2025-03-15 13:05:00\" -o /home/cluster/loganalyzer
      "
sysinfo_txt="sysinfo"
cluster_txt="clusterinfo"
time_period=""
options=""
output=""
fromday=""
today=""
fromtime=""
totime=""
timef=""
timet=""
matchp=""
systeminfo=0
clusterinfo=0
ancluster=0
while true
do
	case "$1" in
		--help)
			printf '%s\n' "$usage"   || exit 1; exit;;
		--version)
			printf '%s\n' "$version" || exit 1; exit;;
		-f)
			shift;
			if [ $# == 0 ];then
			   printf '%s\n' "$usage"   || exit 1;
			else
			   #time_period1="$1";
			   fromday=`echo $1|awk -F' ' '{print $1}'|sed 's/-//g'`
			   time_period1=`date -d "$1" +"%b %d %H:%M:%S"`
			   timef=`date -d "$1" +%s`
			   if [ $? != 0 ];then
				   echo "The specified value of the -f parameter does not meet the requirements"
				   exit 1
			   fi
			   fromtime=`date -d "$1" +"%Y%m%d%H%M%S"`
			   shift
			fi
			;;
		-t)
                        shift;
                        if [ $# == 0 ];then
                           printf '%s\n' "$usage"   || exit 1;
                        else
                           #time_period1="$1";
			   today=`echo $1|awk -F' ' '{print $1}'|sed 's/-//g'`
                           time_period2=`date -d "$1" +"%b %d %H:%M:%S"`
			   timet=`date -d "$1" +%s`
			   if [ $? != 0 ];then
                                   echo "The specified value of the -t parameter does not meet the requirements"
                                   exit 1
                           fi
			   totime=`date -d "$1" +"%Y%m%d%H%M%S"`
                           shift
                        fi
                        ;;
		--level)
			shift;
			if [ $# == 0 ];then
			    printf '%s\n' "$usage"   || exit 1;
			else
			    options="$1";
			    shift;
			fi
			;;
		-o)
			shift;
			if [ $# == 0 ];then
                            printf '%s\n' "$usage"   || exit 1;
			else
			    if [ -d "$1" ];then
				    output="$1";
			    else
				    echo "The Dir is not exist"
				    exit 1
			    fi
                            shift;
			fi
                        ;;
		-p)
			shift;
			if [ $# == 0 ];then
				printf '%s\n' "$usage"   || exit 1;
			else
				matchp="$1"
			fi
			shift;
			;;
		-a)
			shift;
			ancluster=1
			;;
		-s) 
			shift;
			systeminfo=1
			;;
		-c)
			shift;
			clusterinfo=1
			;;
		*)
			if [ $# == 0 ];then
				break
			else
			    printf '%s\n' "$usage"   || exit 1; exit 
			fi
			;;
	esac
done

. /usr/share/pacemaker/report.common

PE_STATE_DIR="/var/lib/pacemaker/pengine"
CRM_CONFIG_DIR="/var/lib/pacemaker/cib"
CRM_STATE_DIR="/var/run/crm"

sysinfo() {
    cluster="corosync"
    echo "Platform: `uname`"
    echo "Kernel release: `uname -r`"
    echo "Architecture: `uname -m`"
    if [ `uname` = Linux ]; then
        echo "Distribution: `distro`"
    fi

    echo
    cibadmin --version 2>&1 | head -1
    cibadmin -! 2>&1

    /usr/sbin/corosync -v 2>&1 | head -1

    # Cluster glue version hash (if available)
    stonith -V 2>/dev/null

    # Resource agents version hash
    echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`"

    echo
    pkg_ver $*
}
pkg_ver() {
    pkg_mgr="rpm"
    echo "Installed packages:"
    rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort
    echo
    for pkg in $*; do
        if rpm -q $pkg >/dev/null 2>&1 ; then
            debug "Verifying installation of: $pkg"
            echo "Verifying installation of: $pkg"
            rpm --verify $pkg 2>&1
        fi
    done
}
#sysinfo
config_files='"/ect/corosync/corosync.conf" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth"'
getconfig(){
    config_dir="$output/config"
    
    if is_running pacemaker-controld; then
	echo "*****CRM_MON*****"
        #crm_mon -1 2>&1 | grep -v '^Last upd' > $config_dir/$CRM_MON_F
	crm_mon -1 2>&1 | grep -v '^Last upd' 
	echo "*****CIB NOW*****"
        #cibadmin -Ql 2>/dev/null > $config_dir/${CIB_F}.live
	cibadmin -Ql 2>/dev/null
	echo "*****MEMBER*****"
        #crm_node -p > "$config_dir/$MEMBERSHIP_F" 2>&1
	crm_node -p
	echo "*****RUNNING*****"
        #echo "$host" > $config_dir/RUNNING
	echo "$host"

    else
        #echo "$host" > $target/STOPPED
	echo "*****STOPPED*****"
	echo "$host"
    fi
    if is_running corosync; then
	echo "*****corosync-blackbox******"
        #corosync-blackbox >corosync-blackbox-live.txt 2>&1
	corosync-blackbox 2>/dev/null
	echo "*****corosync-cmapctl*****"
	corosync-cmapctl 2>/dev/null
        #corosync-cmapctl    > corosync.dump  2>/dev/null
	echo "*****corosync-quorumtool*****"
	corosync-quorumtool -s -i
        #corosync-quorumtool -s -i > corosync.quorum 2>&1
    fi
}
#getconfig $config_files
exitdo(){
rm -rf /tmp/.ha_*
rm -rf /tmp/.pacemakerlog
rm -rf /tmp/.tmppacemakerlog
rm -rf /tmp/.tmptimelog
rm -rf /tmp/.haerrorlog
}

<<EOF
nowdate=`date +"%b %d %H:%M:%S"`
if [ "x$time_period" != "x" ];then
     echo $time_period
     case $time_period in
	     [1-9]|[1-9][0-9]|[1-9][0-9][0-9])
		     echo "shuzi"
		     expdate=`date +"%b %d %H:%M:%S" --date="-$time_period day"`
		     ;;
	     -[1-9]|-[1-9][0-9]|-[1-9][0-9][0-9])
		     echo "jian"
		     ;;
	     +[1-9]|+[1-9][0-9]|+[1-9][0-9][0-9])
		     echo "jia"
		     ;;
	     *)
		     echo "The time must be [+/-]day"
		     exit 1
		     ;;
	esac
else
	expdate=`date +"%b %d %H:%M:%S" --date="-1 day"`
fi
EOF

get_systeminfo(){
	if [ $systeminfo -eq 1 ];then
		if [ -z $output ];then
			sysinfo 
		else
			sysinfo > $output/$sysinfo_txt
		fi
	fi
}

get_clusterinfo(){
	if [ $clusterinfo -eq 1 ];then
        	if [ -z $output ];then
                	getconfig $config_files
        	else
                	getconfig $config_files > $output/$cluster_txt
        	fi
	fi
}
get_tmplog(){
	if [ "x$time_period2" != "x" ] && [ "x$time_period1" == "x" ];then
        	echo "You have defined the -t parameter, so the parameter -f must be specified!"
        	return 1
	fi
	if [[ -n $totime ]];then
		if [ $fromtime -gt $totime ];then
			echo "The from time can not be greate than to time"
			exit 1
		fi
	fi

	if [ -z "$fromday" ]&&[ -z $today ];then
		cat /var/log/pacemaker/pacemaker.log > /tmp/.tmppacemakerlog
	else
		logs=`find /var/log/pacemaker/  -name "pacemaker.log-*.gz"|sort`
		for log in $logs
		do
			logday=`echo $log |awk -F/ '{print $5}' |awk -F. '{print $2}' |awk -F- '{print $2}'`
			if [ $fromday -le $logday ];then
				if [[ -z $today ]];then
					gunzip -k $log -c >/tmp/.tmptimelog
					cat /tmp/.tmptimelog >> /tmp/.tmppacemakerlog
				elif [ $today -le $logday ];then
					gunzip -k $log -c >/tmp/.tmptimelog
					cat /tmp/.tmptimelog >> /tmp/.tmppacemakerlog
					break;
				elif [ $today -gt $logday ];then
					gunzip -k $log -c >/tmp/.tmptimelog
					cat /tmp/.tmptimelog >> /tmp/.tmppacemakerlog
				fi
			fi

		done
		cat /var/log/pacemaker/pacemaker.log >> /tmp/.tmppacemakerlog
	fi
	if [ -e /tmp/.tmppacemakerlog ];then
		fromline=`findln_by_time "/tmp/.tmppacemakerlog" $timef`
		toline=`findln_by_time "/tmp/.tmppacemakerlog" $timet`
	fi
	if [ $fromline -eq $toline ];then
		echo "No log find"
                exitdo
                return 1
	else
		sed -n "$fromline,$toline p" /tmp/.tmppacemakerlog > /tmp/.pacemakerlog
	fi
	return 0
}
#awk -F " " '$$time_period1 <= $2" "$3 && $2" "$3 <= "04 17:20:00"' /var/log/pacemaker/user.log
#sed -n "/$fromt1/,/$fromt2/p" /var/log/pacemaker/user.log
get_level_info(){
	if [[ -z $options ]] || [ "$options" == "error" ];then
		grep -i "error" /tmp/.pacemakerlog |grep -v pacemaker-based|grep -v pacemaker-attrd|grep -v pacemaker-controld|grep -v "Transport endpoint is not connected" |uniq > /tmp/.haerrorlog
		grep -i "fail" /tmp/.pacemakerlog |grep -v pacemaker-based|grep -v pacemaker-attrd|grep -v pacemaker-controld|grep -v "Transport endpoint is not connected" |uniq >> /tmp/.haerrorlog
		cat /tmp/.haerrorlog |sort -M |uniq > /tmp/.ha_error
	elif [ "$options" == "event" ];then
		grep -E 'log_executor_event|log_list_item|log_execute|determine_online_status|pcmk__native_allocate|rsc_action_default|cluster_status|remap_operation|unpack_rsc_op_failure|pe_get_failcount|RecurringOp' /tmp/.pacemakerlog > /tmp/.ha_event
	elif [ "$options" == "fence" ];then
		grep "pacemaker-fenced" /tmp/.pacemakerlog > /tmp/.ha_fence
	else
		echo "The specified value of the --level parameter does not meet the requirements !"
		exitdo
		exit 1
	fi
}

get_pattern_info(){
	grep -i "$1" /tmp/.pacemakerlog	
}
write_new_log(){
	if [[ -n $fromtime ]];then
		if [[ -n $totime ]];then
			name="ha_log_filter_$options_$fromtime-$totime.log"
		else
			nowtime=`date +"%Y%m%d%H%M%S"`
			name="ha_log_filter_$options_$fromtime-$nowtime.log"
		fi
	else
		name="ha_log_filter.log"
	fi

	if [[ -z $output ]];then
		cat /tmp/.ha_* > ./$name
	else
		cat /tmp/.ha_* > $output/$name
	fi
}

analyze_cluster(){
	echo "	The cluster information is:"
	if is_running pacemaker-controld; then
		crmadmin -S $host
		crmadmin -P
		crmadmin -D
		crmadmin -N
		echo "	Configure check:"
		crm_verify -L -V
		if [ $? -eq 0 ];then
		       echo "The configure is ok!"
	        fi	       
	else
		echo "The pacemaker is not running!"
	fi
	if is_running corosync;then
		echo "	The heartbeat information is:"
		corosync-cfgtool -s
	else
		echo "The corosync is not running!"
	fi
	logline=`cat /tmp/.ha_* |wc -l`
	if [ -z $options ];then
		options="error"
	fi
	echo "There are $logline lines of $options related log information here."
	if [ $logline -eq 0 ];then
		echo "There is nothing to find."
		return 0
	fi
	echo "From the log information, from $time_period1 to $time_period2 we find: "
	if [ "$options" == "error" ];then
		opinfo=`awk '{print $6}' /tmp/.ha_* |sort|uniq`
	for strtmp in $opinfo 
	do
	      	case $strtmp in
			*unpack_resources*)
				echo "There is resource unpack for define."

				;;
			*unpack_rsc_op_failure*)
				echo "There is resource operation failure."
				;;
			*pcmk__log_transition_summary*)
				grep "pcmk__log_transition_summary" /tmp/.ha_* |grep "notice:"|cut -d' ' -f7- |sort|uniq
				;;
			*pcmk__primitive_assign*)
				;;
			*pe_get_failcount*)
				;;
			\[[0-9]*\])
				opinfo1=`awk '{print $7}' /tmp/.ha_* |sort|uniq`
					for strtmp1 in $opinfo1
					do
						case $strtmp1 in
							*log_op_output*)
								echo "There is errro output execute here:"
								grep "log_op_output" /tmp/.ha_* |cut -d' ' -f8- |sort|uniq
								;;
							*)
								;;
						esac
					done

				;;
			*pcmk__threshold_reached*)
				grep "pcmk__threshold_reached" /tmp/.ha_* |grep "warning:"|cut -d' ' -f7- |sort|uniq
				;;
			*pcmk__primitive_create_actions*)
				grep "pcmk__threshold_reached" /tmp/.ha_* |grep "error:"|cut -d' ' -f7- |sort|uniq
				;;
			*)
				echo "There is unknow error $strtmp here"
				;;
		esac	
        done	       
	elif [ "$options" == "event" ];then
		tmpline=`grep "log_executor_event"  /tmp/.ha_* |wc -l`
		echo "There are $tmpline related logs about resource execution."
		tmpline=`grep "log_execute"  /tmp/.ha_* |wc -l`
		echo "There are $tmpline excuting operation logs about resource start stop or restart."
		tmpline=`grep "log_list_item"  /tmp/.ha_* |wc -l`
		echo "There are $tmpline notice operation logs about resource start stop or restart."
		tmpline=`grep "determine_online_status"  /tmp/.ha_* |wc -l`
		echo "There are $tmpline logs regarding online information of nodes."
		tmpline=`grep "rsc_action_default"  /tmp/.ha_* |wc -l`
                echo "There are $tmpline logs regarding the current status information of resources."
		tmpline=`grep "RecurringOp"  /tmp/.ha_* |wc -l`
                echo "There are $tmpline logs about RecurringOp operations!!"

	fi
	echo "Please check the log for more information!"
}
if [ -z $timef ];then
	echo "The from time must be specified!"
	printf '%s\n' "$usage"
	exit 1
fi
if [ -z $output ];then
	echo "The output diretory must be specified!"
	printf '%s\n' "$usage"
	exit 1
fi
if [ -z $timet ];then
	timet=`date +%s`
fi
get_systeminfo
get_clusterinfo
if get_tmplog;then
	if [ -n "$matchp" ];then
		get_pattern_info "$matchp" > "$output/pattern_match"
	fi
	get_level_info
	if [ $ancluster -eq 1 ];then
		analyze_cluster  > "$output/analyze"
	fi
	write_new_log
else
	echo "There are no log information to analyze!"
	exit 1
fi

exitdo
unset LC_ALL
exit 0
