#!/usr/bin/env perl # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html #------------------------------------------------------------------------------ =head1 monerrorlog =head2 When first run (by the sensor) this script adds an entry to the AIX ODM or Linux syslog.conf file so that it will be notified when an error is logged (through a message queue on AIX and a named pipe on Linux). Then it checks for any logged errors. On all subsequent runs this script just checks for errors. =cut #------------------------------------------------------------------------------ BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; } use lib "$::XCATROOT/lib/perl/xCAT_monitoring/rmc"; use strict; use locale; use Getopt::Std; use IPC::SysV qw(IPC_STAT S_IRWXU IPC_PRIVATE IPC_CREAT S_IRUSR S_IWUSR ); use IPC::Msg; use NodeUtils; #do nothing on Linux when stopping. if (($ENV{'SENSOR_MonitorStatus'} eq '2') && ($^O =~ /^linux/i)) { exit 0; } #normal my $dirname = "xcat_rmc_err_mon"; my $vardir = "/var/opt/$dirname" ; my $runfile = "$vardir/.monerrorlog_run"; my $fifo = "$vardir/syslog_fifo"; my ($syslogconf, $embedinfo); if (-e "/etc/SuSE-release") { #SLES 10 $syslogconf = "/etc/syslog-ng/syslog-ng.conf"; $embedinfo = "destination warn_fifo { pipe(\\\"$fifo\\\" group(root) perm(0644)); };\nlog { source(src); filter(f_warn); destination(warn_fifo); };"; } else { #others $syslogconf = "/etc/syslog.conf"; $embedinfo = "*.warn |$fifo"; } my $odmstanza = "$vardir/odmstanza"; if (!-d $vardir) { mkdir($vardir); } sub isRMrunning{ my $resMan = $_[0]; my @output = NodeUtils->runcmd("LANG=C /usr/bin/lssrc -s $resMan", -1); if ($::RUNCMD_RC) { return 0; } # maybe we should try to catch real errors here my ($subsys, $group, $pid, $status) = split(' ', $output[1]); if (defined($status) && $status eq 'active') { #now check to see if IBM.AuditRM is up return 1; } return 0; } #check to see if this is the first time this script has been run if (!-e $runfile){ #first time if ($^O =~ /^linux/i) { NodeUtils->runcmd("grep $dirname $syslogconf", -1); if ($::RUNCMD_RC == 1) { #grep did not find dirname #update syslog.conf if (!-d $vardir) { mkdir($vardir); } NodeUtils->runcmd("/usr/bin/mkfifo $fifo"); NodeUtils->runcmd("echo \"$embedinfo\" >> $syslogconf"); my $cmd = service("syslog", "restart"); NodeUtils->runcmd($cmd); } NodeUtils->touchFile($runfile); } elsif ($^O =~ /^aix/i) { open(ODM, ">$odmstanza") or die $!; print ODM ' errnotify: en_pid = 0 en_name = "xcat_rmc_errlog_sensor" en_persistenceflg = 1 en_method = "' . "$::XCATROOT/lib/perl/xCAT_monitoring/rmc/errmsgque" . ' sequence = $1 error_id = $2 class = $3 type = $4 alert_flags = $5 res_name = $6 res_type = $7 res_class = $8 label = $9" '; close ODM or die $!; NodeUtils->runcmd("/usr/bin/odmadd $odmstanza"); NodeUtils->touchFile($runfile); } else { print "unknown platform\n"; exit 1; } } #Check for errors #see if at is running verify_atd(); #TODO optimize this by not using at if ($^O =~ /^linux/i) { local $SIG{ALRM} = sub { die "alarm\n" }; eval { alarm 4; open(PIPE, $fifo) or die print "Could not open $fifo.\n"; alarm 0; }; if ($@ =~ /alarm/) { close PIPE; exit 0; } while (1) { my $line; eval { alarm 2; $line = ; alarm 0; }; if ($@ =~ /alarm/) { close PIPE; exit 0; } chomp($line); #print "String=\"$line\"\n"; NodeUtils->runcmd( "echo \"/usr/bin/refsensor ErrorLogSensor String=\'$line\' 1>/dev/null 2>/dev/null\" | at now",0); } close PIPE; } elsif ($^O =~ /^aix/i) { # the monitoring is stopped if ($ENV{'SENSOR_MonitorStatus'} eq '2') { # stopsrc -s IBM.SensorRM will also # set $ENV{'SENSOR_MonitorStatus'} to 2 # should not do clean up when IBM.SensorRM is stopped if (&isRMrunning("IBM.SensorRM")) { NodeUtils->runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog_sens\"", -1); if (-e $runfile) { unlink($runfile); } } exit 0; } my $m = ord('xcat_rmc'); my $key = IPC::SysV::ftok("/var/adm/ras/errlog", $m); my $buf; my $msg = new IPC::Msg($key, IPC_CREAT | S_IRUSR | S_IWUSR); local $SIG{ALRM} = sub { die "alarm\n" }; while (1) { eval { alarm 2; my $rectype = $msg->rcv($buf, 256); alarm 0; }; if ($@ =~ /alarm/) { close PIPE; exit 0; } NodeUtils->runcmd( "echo \"/usr/bin/refsensor ErrorLogSensor String=\'$buf\' 1>/dev/null 2>/dev/null\" | at now", 0); } exit 0; } #-------------------------------------------------------------------------------- =head3 verify_atd check for atd status. If it is not running, start it. Arguments: Returns: $::RUNCMD_RC = 0 atd is running $::RUNCMD_RC > 0 atd is not running =cut #-------------------------------------------------------------------------------- sub verify_atd { my $cmd; $cmd = service("atd", "status"); NodeUtils->runcmd($cmd, -1); if ($::RUNCMD_RC) { $cmd = service("atd", "start"); NodeUtils->runcmd($cmd, -1); if ($::RUNCMD_RC) { print "Warning: atd has failed to start!\n"; } elsif (!$::RUNCMD_RC) { sleep(1); } } else { ;#??? } return $::RUNCMD_RC; } #-------------------------------------------------------------------------------- =head3 service Send a service request to an init script. Arguments: $service - a service name such as 'inetd','xinetd' $svcarg - arguments for the service such as 'start', 'stop' or 'status'. Returns: A full cli for the service script. =cut #-------------------------------------------------------------------------------- sub service { my ($service, $svcarg) = @_; my $cmd; my $SVCCLI = "/sbin/service"; my $SVCDIR = "/etc/init.d"; # On SLES, nfs server script is "nfsserver". if (((-e "/etc/SuSE-release") || NodeUtils->isHMC()) && $service eq "nfs") { $service = "nfsserver"; } if (-f $SVCCLI) { $cmd = "$SVCCLI $service $svcarg "; } else { $cmd = "$SVCDIR/$service $svcarg"; } return $cmd; } exit 0;