mirror of
https://opendev.org/x/pyghmi
synced 2025-08-13 14:50:15 +00:00
Implement event log retrieval from BMCs
BMCs retain historical event data in the SEL. Implement code to read through the SEL. It also passes the processed data to the OEM framework for further processing since OEMs may define a number of events. Note that it is not necessarily the OEM of the system that defines the OEM decode of the event. A timestamped OEM event may contain a different OEM id. This permits things like the system, the OS, agents, et all to use the SEL to store various things. Change-Id: Ibfb07146b1dfa0ce06df863e805b5a30f17d2f18
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
import pyghmi.constants as const
|
||||
import pyghmi.exceptions as exc
|
||||
|
||||
import pyghmi.ipmi.events as sel
|
||||
import pyghmi.ipmi.fru as fru
|
||||
from pyghmi.ipmi.oem.lookup import get_oem_handler
|
||||
from pyghmi.ipmi.private import session
|
||||
@@ -296,6 +297,31 @@ class Command(object):
|
||||
raise exc.IpmiException(response['error'])
|
||||
return {'bootdev': bootdev}
|
||||
|
||||
def xraw_command(self, netfn, command, bridge_request=(), data=(),
|
||||
delay_xmit=None):
|
||||
"""Send raw ipmi command to BMC, raising exception on error
|
||||
|
||||
This is identical to raw_command, except it raises exceptions
|
||||
on IPMI errors and returns data as a buffer. This is the recommend
|
||||
function to use. The response['data'] being a buffer allows
|
||||
traditional indexed access as well as works nicely with
|
||||
struct.unpack_from when certain data is coming back.
|
||||
|
||||
:param netfn: Net function number
|
||||
:param command: Command value
|
||||
:param bridge_request: The target slave address and channel number for
|
||||
the bridge request.
|
||||
:param data: Command data as a tuple or list
|
||||
:returns: dict -- The response from IPMI device
|
||||
"""
|
||||
rsp = self.ipmi_session.raw_command(netfn=netfn, command=command,
|
||||
bridge_request=bridge_request,
|
||||
data=data, delay_xmit=delay_xmit)
|
||||
if 'error' in rsp:
|
||||
raise exc.IpmiException(rsp['error'], rsp['code'])
|
||||
rsp['data'] = buffer(bytearray(rsp['data']))
|
||||
return rsp
|
||||
|
||||
def raw_command(self, netfn, command, bridge_request=(), data=(),
|
||||
delay_xmit=None):
|
||||
"""Send raw ipmi command to BMC
|
||||
@@ -364,6 +390,39 @@ class Command(object):
|
||||
if 'error' in response:
|
||||
raise exc.IpmiException(response['error'])
|
||||
|
||||
def init_sdr(self):
|
||||
"""Initialize SDR
|
||||
|
||||
Do the appropriate action to have a relevant sensor description
|
||||
repository for the current management controller
|
||||
"""
|
||||
# For now, return current sdr if it exists and still connected
|
||||
# future, check SDR timestamp for continued relevance
|
||||
# further future, optionally support a cache directory/file
|
||||
# to store cached copies for given device id, product id, mfg id,
|
||||
# sdr timestamp, our data version revision, aux firmware revision,
|
||||
# and oem defined field
|
||||
if self._sdr is None:
|
||||
self._sdr = sdr.SDR(self)
|
||||
return self._sdr
|
||||
|
||||
def get_event_log(self, clear=False):
|
||||
"""Retrieve the log of events, optionally clearing
|
||||
|
||||
The contents of the SEL are returned as an iterable. Timestamps
|
||||
are given as local time, ISO 8601 (whether the target has an accurate
|
||||
clock or not). Timestamps may be omitted for events that cannot be
|
||||
given a timestamp, leaving only the raw timecode to provide relative
|
||||
time information. clear set to true will result in the log being
|
||||
cleared as it is returned. This allows an atomic fetch and clear
|
||||
behavior so that no log entries will be lost between the fetch and
|
||||
clear actions. There is no 'clear_event_log' function to encourage
|
||||
users to create code that is not at risk for losing events.
|
||||
|
||||
:param clear: Whether to remove the SEL entries from the target BMC
|
||||
"""
|
||||
return sel.EventHandler(self.init_sdr()).fetch_sel(self, clear)
|
||||
|
||||
def get_inventory_descriptions(self):
|
||||
"""Retrieve list of things that could be inventoried
|
||||
|
||||
|
475
pyghmi/ipmi/events.py
Normal file
475
pyghmi/ipmi/events.py
Normal file
@@ -0,0 +1,475 @@
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# Copyright 2015 Lenovo
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# __author__ = 'jjohnson2@lenovo.com'
|
||||
|
||||
import pyghmi.constants as pygconst
|
||||
import pyghmi.exceptions as pygexc
|
||||
import pyghmi.ipmi.private.constants as ipmiconst
|
||||
import struct
|
||||
import time
|
||||
|
||||
|
||||
psucfg_errors = {
|
||||
0: 'Vendor mismatch',
|
||||
1: 'Revision mismatch',
|
||||
2: 'Processor missing', # e.g. pluggable CPU VRMs...
|
||||
3: 'Insufficient power',
|
||||
4: 'Voltage mismatch',
|
||||
}
|
||||
|
||||
firmware_progress = {
|
||||
0: 'Unspecified',
|
||||
1: 'Memory initialization',
|
||||
2: 'Disk initialization',
|
||||
3: 'Non-primary Processor initialization',
|
||||
4: 'User authentication',
|
||||
5: 'Entering setup',
|
||||
6: 'USB initialization',
|
||||
7: 'PCI initialization',
|
||||
8: 'Option ROM initialization',
|
||||
9: 'Video initialization',
|
||||
0xa: 'Cache initialization',
|
||||
0xb: 'SMBus initialization',
|
||||
0xc: 'Keyboard initialization',
|
||||
0xd: 'Embedded controller initialization',
|
||||
0xe: 'Docking station attachment',
|
||||
0xf: 'Docking station enabled',
|
||||
0x10: 'Docking station ejection',
|
||||
0x11: 'Docking station disabled',
|
||||
0x12: 'Waking OS',
|
||||
0x13: 'Starting OS boot',
|
||||
0x14: 'Baseboard initialization',
|
||||
0x16: 'Floppy initialization',
|
||||
0x17: 'Keyboard test',
|
||||
0x18: 'Pointing device test',
|
||||
0x19: 'Primary processor initialization',
|
||||
}
|
||||
|
||||
firmware_errors = {
|
||||
0: 'Unspecified',
|
||||
1: 'No memory installed',
|
||||
2: 'All memory failed',
|
||||
3: 'Unrecoverable disk failure',
|
||||
4: 'Unrecoverable board failure',
|
||||
5: 'Unrecoverable diskette failure',
|
||||
6: 'Unrecoverable storage controller failure',
|
||||
7: 'Unrecoverable keyboard failure', # Keyboard error, press
|
||||
# any key to continue..
|
||||
8: 'Removable boot media not found',
|
||||
9: 'Video adapter failure',
|
||||
0xa: 'No video device',
|
||||
0xb: 'Firmware corruption detected',
|
||||
0xc: 'CPU voltage mismatch',
|
||||
0xd: 'CPU speed mismatch',
|
||||
}
|
||||
|
||||
auxlog_actions = {
|
||||
0: 'entry added',
|
||||
1: 'entry added (could not map to standard)',
|
||||
2: 'entry added with corresponding standard events',
|
||||
3: 'log cleared',
|
||||
4: 'log disabled',
|
||||
5: 'log enabled',
|
||||
}
|
||||
|
||||
restart_causes = {
|
||||
0: 'Unknown',
|
||||
1: 'Remote request',
|
||||
2: 'Reset button',
|
||||
3: 'Power button',
|
||||
4: 'Watchdog',
|
||||
5: 'OEM',
|
||||
6: 'Power restored',
|
||||
7: 'Power restored',
|
||||
8: 'Reset due to event',
|
||||
9: 'Cycle due to event',
|
||||
0xa: 'OS reset',
|
||||
0xb: 'Timer wake',
|
||||
}
|
||||
|
||||
slot_types = {
|
||||
0: 'PCI',
|
||||
1: 'Drive Array',
|
||||
2: 'External connector',
|
||||
3: 'Docking',
|
||||
4: 'Other',
|
||||
5: 'Entity ID',
|
||||
6: 'AdvancedTCA',
|
||||
7: 'Memory',
|
||||
8: 'Fan',
|
||||
9: 'PCIe',
|
||||
10: 'SCSI',
|
||||
11: 'SATA/SAS',
|
||||
}
|
||||
|
||||
power_states = {
|
||||
0: 'S0',
|
||||
1: 'S1',
|
||||
2: 'S2',
|
||||
3: 'S3',
|
||||
4: 'S4',
|
||||
5: 'S5',
|
||||
6: 'S4 or S5',
|
||||
7: 'G3',
|
||||
8: 'S1, S2, or S3',
|
||||
9: 'G1',
|
||||
0xa: 'S5',
|
||||
0xb: 'on',
|
||||
0xc: 'off',
|
||||
}
|
||||
|
||||
watchdog_boot_phases = {
|
||||
1: 'Firmware',
|
||||
2: 'Firmware',
|
||||
3: 'OS Load',
|
||||
4: 'OS',
|
||||
5: 'OEM',
|
||||
}
|
||||
|
||||
version_changes = {
|
||||
1: 'Device ID',
|
||||
2: 'Management controller firmware',
|
||||
3: 'Management controller revision',
|
||||
4: 'Management conroller manufacturer',
|
||||
5: 'IPMI version',
|
||||
6: 'Management controller firmware',
|
||||
7: 'Management controller boot block',
|
||||
8: 'Management controller firmware',
|
||||
9: 'System Firmware (UEFI/BIOS)',
|
||||
0xa: 'SMBIOS',
|
||||
0xb: 'OS',
|
||||
0xc: 'OS Loader',
|
||||
0xd: 'Diagnostics',
|
||||
0xe: 'Management agent',
|
||||
0xf: 'Management application',
|
||||
0x10: 'Management middleware',
|
||||
0x11: 'FPGA',
|
||||
0x12: 'FRU',
|
||||
0x13: 'FRU',
|
||||
0x14: 'Equivalent FRU',
|
||||
0x15: 'Updated FRU',
|
||||
0x16: 'Older FRU',
|
||||
0x17: 'Hardware (switch/jumper)',
|
||||
}
|
||||
|
||||
fru_states = {
|
||||
0: 'Normal',
|
||||
1: 'Externally requested',
|
||||
2: 'Latch',
|
||||
3: 'Hot swap',
|
||||
4: 'Internal action',
|
||||
5: 'Lost communication',
|
||||
6: 'Lost communication',
|
||||
7: 'Unexpected removal',
|
||||
8: 'Operator',
|
||||
9: 'Unable to compute IPMB address',
|
||||
0xa: 'Unexpected deactivation',
|
||||
}
|
||||
|
||||
|
||||
def decode_eventdata(sensor_type, offset, eventdata, sdr):
|
||||
"""Decode extra event data from an alert or log
|
||||
|
||||
Provide a textual summary of eventdata per descriptions in
|
||||
Table 42-3 of the specification. This is for sensor specific
|
||||
offset events only.
|
||||
|
||||
:param sensor_type: The sensor type number from the event
|
||||
:param offset: Sensor specific offset
|
||||
:param eventdata: The three bytes from the log or alert
|
||||
"""
|
||||
if sensor_type == 5 and offset == 4: # link loss, indicates which port
|
||||
return 'Port {0}'.format(eventdata[1])
|
||||
elif sensor_type == 8 and offset == 6: # PSU cfg error
|
||||
errtype = eventdata[2] & 0b1111
|
||||
return psucfg_errors.get(errtype, 'Unknown')
|
||||
elif sensor_type == 0xc and offset == 8: # Memory spare
|
||||
return 'Module {0}'.format(eventdata[2])
|
||||
elif sensor_type == 0xf:
|
||||
if offset == 0: # firmware error
|
||||
return firmware_errors.get(eventdata[1], 'Unknown')
|
||||
elif offset in (1, 2):
|
||||
return firmware_progress.get(eventdata[1], 'Unknown')
|
||||
elif sensor_type == 0x10:
|
||||
if offset == 0: # Correctable error logging on a specific memory part
|
||||
return 'Module {0}'.format(eventdata[1])
|
||||
elif offset == 1:
|
||||
return 'Reading type {0:02X}h, offset {1:02X}h'.format(
|
||||
eventdata[1], eventdata[2] & 0b1111)
|
||||
elif offset == 5:
|
||||
return '{0}%'.format(eventdata[2])
|
||||
elif offset == 6:
|
||||
return 'Processor {0}'.format(eventdata[1])
|
||||
elif sensor_type == 0x12:
|
||||
if offset == 3:
|
||||
action = (eventdata[1] & 0b1111000) >> 4
|
||||
return auxlog_actions.get(action, 'Unknown')
|
||||
elif offset == 4:
|
||||
sysactions = []
|
||||
if eventdata[1] & 0b1 << 5:
|
||||
sysactions.append('NMI')
|
||||
if eventdata[1] & 0b1 << 4:
|
||||
sysactions.append('OEM action')
|
||||
if eventdata[1] & 0b1 << 3:
|
||||
sysactions.append('Power Cycle')
|
||||
if eventdata[1] & 0b1 << 2:
|
||||
sysactions.append('Reset')
|
||||
if eventdata[1] & 0b1 << 1:
|
||||
sysactions.append('Power Down')
|
||||
if eventdata[1] & 0b1:
|
||||
sysactions.append('Alert')
|
||||
return ','.join(sysactions)
|
||||
elif offset == 5: # Clock change event, either before or after
|
||||
if eventdata[1] & 0b10000000:
|
||||
return 'After'
|
||||
else:
|
||||
return 'Before'
|
||||
elif sensor_type == 0x19 and offset == 0:
|
||||
return 'Requested {0] while {1}'.format(eventdata[1], eventdata[2])
|
||||
elif sensor_type == 0x1d and offset == 7:
|
||||
return restart_causes.get(eventdata[1], 'Unknown')
|
||||
elif sensor_type == 0x21 and offset == 0x9:
|
||||
return '{0} {1}'.format(slot_types.get(eventdata[1], 'Unknown'),
|
||||
eventdata[2])
|
||||
|
||||
elif sensor_type == 0x23:
|
||||
phase = eventdata[1] & 0b1111
|
||||
return watchdog_boot_phases.get(phase, 'Unknown')
|
||||
elif sensor_type == 0x28:
|
||||
if offset == 4:
|
||||
return 'Sensor {0}'.format(eventdata[1])
|
||||
elif offset == 5:
|
||||
islogical = (eventdata[1] & 0b10000000)
|
||||
if islogical:
|
||||
if eventdata[2] in sdr.fru:
|
||||
return sdr.fru[eventdata[2]].fru_name
|
||||
else:
|
||||
return 'FRU {0}'.format(eventdata[2])
|
||||
elif sensor_type == 0x2a and offset == 3:
|
||||
return 'User {0}'.format(eventdata[1])
|
||||
elif sensor_type == 0x2b:
|
||||
return version_changes.get(eventdata[1], 'Unknown')
|
||||
elif sensor_type == 0x2c:
|
||||
cause = (eventdata[1] & 0b11110000) >> 4
|
||||
cause = fru_states.get(cause, 'Unknown')
|
||||
oldstate = eventdata[1] & 0b1111
|
||||
if oldstate != offset:
|
||||
try:
|
||||
cause += '(change from {0})'.format(
|
||||
ipmiconst.sensor_type_offsets[0x2c][oldstate]['desc'])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def _fix_sel_time(records, ipmicmd):
|
||||
timefetched = False
|
||||
rsp = None
|
||||
while not timefetched:
|
||||
try:
|
||||
rsp = ipmicmd.xraw_command(netfn=0xa, command=0x48)
|
||||
timefetched = True
|
||||
except pygexc.IpmiException as pi:
|
||||
if pi.ipmicode == 0x81:
|
||||
continue
|
||||
raise
|
||||
# The specification declares an epoch and all that, but we really don't
|
||||
# care. We instead just focus on differences from the 'present'
|
||||
nowtime = struct.unpack_from('<I', rsp['data'])[0]
|
||||
correctednowtime = nowtime
|
||||
if nowtime < 0x20000000:
|
||||
correctearly = True
|
||||
inpreinit = True
|
||||
else:
|
||||
correctearly = False
|
||||
inpreinit = False
|
||||
newtimestamp = 0
|
||||
lasttimestamp = 0
|
||||
trimindexes = []
|
||||
for index in reversed(xrange(len(records))):
|
||||
record = records[index]
|
||||
if 'timecode' not in record or record['timecode'] == 0xffffffff:
|
||||
continue
|
||||
if record['description'] == 'Clock time change: After':
|
||||
newtimestamp = record['timecode']
|
||||
trimindexes.append(index)
|
||||
elif record['description'] == 'Clock time change: Before':
|
||||
if newtimestamp:
|
||||
if record['timecode'] < 0x20000000:
|
||||
correctearly = True
|
||||
nowtime = correctednowtime
|
||||
correctednowtime += newtimestamp - record['timecode']
|
||||
newtimestamp = 0
|
||||
trimindexes.append(index)
|
||||
else:
|
||||
# clean up after potentially broken time sync pairs
|
||||
newtimestamp = 0
|
||||
if record['timecode'] < 0x20000000: # uptime timestamp
|
||||
if not correctearly:
|
||||
correctednowtime = nowtime
|
||||
continue
|
||||
if record['timecode'] < lasttimestamp:
|
||||
# Time has gone backwards in pre-init, no hope for
|
||||
# accurate time
|
||||
correctearly = False
|
||||
correctednowtime = nowtime
|
||||
continue
|
||||
inpreinit = True
|
||||
lasttimestamp = record['timecode']
|
||||
else:
|
||||
# We are in 'normal' time, assume we cannot go to
|
||||
# pre-init time and do corrections unless time sync events
|
||||
# guide us in safely
|
||||
if inpreinit:
|
||||
inpreinit = False
|
||||
# We were in pre-init, now in real time, reset the
|
||||
# time correction factor to the last stored
|
||||
# 'wall clock' correction
|
||||
correctednowtime = nowtime
|
||||
correctearly = False
|
||||
if correctednowtime < 0x20000000:
|
||||
# We can't correct time when the correction factor is
|
||||
# rooted in a pre-init timestamp, just convert
|
||||
record['timestamp'] = time.strftime(
|
||||
'%Y-%m-%dT%H:%M:%S', time.localtime(
|
||||
record['timecode']))
|
||||
else:
|
||||
age = correctednowtime - record['timecode']
|
||||
record['timestamp'] = time.strftime(
|
||||
'%Y-%m-%dT%H:%M:%S', time.localtime(
|
||||
time.time() - age))
|
||||
|
||||
|
||||
class EventHandler(object):
|
||||
"""IPMI Event Processor
|
||||
|
||||
This class provides facilities for processing alerts and event log
|
||||
data. This can be used to aid in pulling historical event data
|
||||
from a BMC or as part of a trap handler to translate the traps into
|
||||
manageable data.
|
||||
|
||||
:param sdr: An SDR object (per pyghmi.ipmi.sdr) matching the target BMC SDR
|
||||
"""
|
||||
def __init__(self, sdr):
|
||||
self._sdr = sdr
|
||||
|
||||
def _decode_standard_event(self, eventdata, event):
|
||||
# Ignore the generator id for now..
|
||||
if eventdata[2] != 4:
|
||||
raise pygexc.PyghmiException(
|
||||
'Unrecognized Event message version {0}'.format(eventdata[2]))
|
||||
sensor_type = eventdata[3]
|
||||
try:
|
||||
event['entity'] = self._sdr.sensors[eventdata[4]].name
|
||||
except KeyError:
|
||||
event['entity'] = 'Sensor {0}'.format(eventdata[4])
|
||||
event['deassertion'] = (eventdata[5] & 0b10000000 == 0b10000000)
|
||||
event_data = eventdata[6:]
|
||||
event_type = eventdata[5] & 0b1111111
|
||||
byte2type = (event_data[0] & 0b11000000) >> 6
|
||||
byte3type = (event_data[0] & 0b110000) >> 4
|
||||
if byte2type == 1:
|
||||
event['triggered_value'] = event_data[1]
|
||||
evtoffset = event_data[0] & 0b1111
|
||||
if event_type <= 0xc:
|
||||
# use generic offset decode for event description
|
||||
event['entity_type'] = ipmiconst.sensor_type_codes.get(
|
||||
sensor_type, '')
|
||||
evreading = ipmiconst.generic_type_offsets.get(
|
||||
event_type, {}).get(evtoffset, {})
|
||||
event['description'] = evreading.get('desc', '')
|
||||
event['severity'] = evreading.get('severity', pygconst.Health.Ok)
|
||||
elif event_type == 0x6f:
|
||||
event['entity_type'] = ipmiconst.sensor_type_codes.get(
|
||||
sensor_type, '')
|
||||
evreading = ipmiconst.sensor_type_offsets.get(
|
||||
sensor_type, {}).get(evtoffset, {})
|
||||
event['description'] = evreading.get('desc', '')
|
||||
event['severity'] = evreading.get('severity', pygconst.Health.Ok)
|
||||
if event_type == 1: # threshold
|
||||
if byte3type == 1:
|
||||
event['threshold_value'] = event_data[2]
|
||||
if 3 in (byte2type, byte3type) or event_type == 0x6f:
|
||||
# sensor specific decode, see sdr module...
|
||||
# 2 - 0xc: generic discrete, 0x6f, sensor specific
|
||||
additionaldata = decode_eventdata(
|
||||
eventdata[3], evtoffset, event_data, self._sdr)
|
||||
if additionaldata:
|
||||
event['description'] = ': '.join((event['description'],
|
||||
additionaldata))
|
||||
|
||||
def _sel_decode(self, origselentry):
|
||||
selentry = bytearray(origselentry)
|
||||
event = {}
|
||||
if selentry[2] == 2 or (0xc0 <= selentry[2] <= 0xdf):
|
||||
# Either standard, or at least the timestamp is standard
|
||||
event['timecode'] = struct.unpack_from('<I', selentry[3:7])[0]
|
||||
if selentry[2] == 2: # ipmi defined standard format
|
||||
self._decode_standard_event(selentry[7:], event)
|
||||
elif 0xc0 <= selentry[2] <= 0xdf:
|
||||
event['oemid'] = selentry[7:10]
|
||||
event['oemdata'] = selentry[10:]
|
||||
elif selentry[2] >= 0xe0:
|
||||
# In this class of OEM message, all bytes are OEM, interpretation
|
||||
# is wholly left up to the OEM layer, using the OEM ID of the BMC
|
||||
event['oemdata'] = selentry[3:]
|
||||
return event
|
||||
|
||||
def _fetch_entries(self, ipmicmd, startat, targetlist, rsvid=0):
|
||||
curr = startat
|
||||
endat = curr
|
||||
while curr != 0xffff:
|
||||
endat = curr
|
||||
reqdata = bytearray(struct.pack('<HHH', rsvid, curr, 0xff00))
|
||||
try:
|
||||
rsp = ipmicmd.xraw_command(
|
||||
netfn=0xa, command=0x43, data=reqdata)
|
||||
except pygexc.IpmiException as pi:
|
||||
if pi.ipmicode == 203:
|
||||
break
|
||||
curr = struct.unpack_from('<H', rsp['data'][:2])[0]
|
||||
targetlist.append(self._sel_decode(rsp['data'][2:]))
|
||||
return endat
|
||||
|
||||
def fetch_sel(self, ipmicmd, clear=False):
|
||||
"""Fetch SEL entries
|
||||
|
||||
Return an iterable of SEL entries. If clearing is requested,
|
||||
the fetch and clear will be done as an atomic operation, assuring
|
||||
no entries are dropped.
|
||||
|
||||
:param ipmicmd: The Command object to use to interrogate
|
||||
:param clear: Whether to clear the entries upon retrieval.
|
||||
"""
|
||||
records = []
|
||||
# First we do a fetch all without reservation, reducing the risk
|
||||
# of having a long lived reservation that gets canceled in the middle
|
||||
endat = self._fetch_entries(ipmicmd, 0, records)
|
||||
if clear and records: # don't bother clearing if there were no records
|
||||
# To do clear, we make a reservation first...
|
||||
rsp = ipmicmd.xraw_command(netfn=0xa, command=0x42)
|
||||
rsvid = struct.unpack_from('<H', rsp['data'])[0]
|
||||
# Then we refetch the tail with reservation (check for change)
|
||||
del records[-1] # remove the record that's about to be duplicated
|
||||
self._fetch_entries(ipmicmd, endat, records, rsvid)
|
||||
# finally clear the SEL
|
||||
# 0XAA means start initiate, 0x524c43 is 'RCL' or 'CLR' backwards
|
||||
clrdata = bytearray(struct.pack('<HI', rsvid, 0xAA524C43))
|
||||
ipmicmd.xraw_command(netfn=0xa, command=0x47, data=clrdata)
|
||||
# Now to fixup the record timestamps... first we need to get the BMC
|
||||
# opinion of current time
|
||||
_fix_sel_time(records, ipmicmd)
|
||||
return records
|
@@ -31,7 +31,7 @@ payload_types = {
|
||||
'rakp4': 0x15,
|
||||
}
|
||||
|
||||
#sensor type codes, table 42-3
|
||||
# sensor type codes, table 42-3
|
||||
sensor_type_codes = {
|
||||
1: 'Temperature',
|
||||
2: 'Voltage',
|
||||
@@ -80,11 +80,70 @@ sensor_type_codes = {
|
||||
}
|
||||
|
||||
# This is from table 42-2
|
||||
#digital discrete poses a challenge from a health perspective. So far all
|
||||
#observed ones are no more or less 'healthy' by being asserted or not asserted
|
||||
#for example asserting that an add-on is installed
|
||||
# digital discrete poses a challenge from a health perspective. So far all
|
||||
# observed ones are no more or less 'healthy' by being asserted or not asserted
|
||||
# for example asserting that an add-on is installed
|
||||
|
||||
discrete_type_offsets = {
|
||||
generic_type_offsets = {
|
||||
1: { # threshold based
|
||||
# Some explanation is offered in the specification around 'get sensor
|
||||
# event status' command. Assertions should indicate the new state
|
||||
# and deassertion should indicate leaving the state. The 'going high'
|
||||
# and 'going low' do not denote leaving a state. For example, going
|
||||
# from Lower Critical to Lower Non-Critical would be 'going high'
|
||||
# Will just report the new state rather than the direction from
|
||||
# which things came from, since in a vacuum it is not useful data
|
||||
# and in the context of event log, it should be better discerned
|
||||
# from the pattern of prior events
|
||||
0: {
|
||||
'desc': 'Lower Non-critical', # - going low',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
1: {
|
||||
'desc': 'Lower Non-critical', # - going high',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
2: {
|
||||
'desc': 'Lower Critical', # - going low',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
3: {
|
||||
'desc': 'Lower Critical', # - going high',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
4: {
|
||||
'desc': 'Lower Non-recoverable', # - going low
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
5: {
|
||||
'desc': 'Lower Non-recoverable', # - going high
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
6: {
|
||||
'desc': 'Upper Non-critical', # - going low
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
7: {
|
||||
'desc': 'Upper Non-critical', # - going high
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
8: {
|
||||
'desc': 'Upper Critical', # - going low
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
9: {
|
||||
'desc': 'Upper Critical', # - going high
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
0xa: {
|
||||
'desc': 'Upper non-recoverable', # - going low
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
0xb: {
|
||||
'desc': 'Upper non-recoverable', # - going high
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
},
|
||||
2: {
|
||||
0: {
|
||||
'desc': 'Idle',
|
||||
@@ -350,31 +409,31 @@ sensor_type_offsets = {
|
||||
},
|
||||
7: {
|
||||
0: {
|
||||
'desc': 'processor IERR',
|
||||
'desc': 'Processor IERR',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
1: {
|
||||
'desc': 'processor thermal trip',
|
||||
'desc': 'Processor thermal trip',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
2: {
|
||||
'desc': 'processor FRB1/BIST failure',
|
||||
'desc': 'Processor FRB1/BIST failure',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
3: {
|
||||
'desc': 'processor FRB2/Hang in POST failure',
|
||||
'desc': 'Processor FRB2/Hang in POST failure',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
4: {
|
||||
'desc': 'processor FRB3/processor startup failure',
|
||||
'desc': 'Processor FRB3/processor startup failure',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
5: {
|
||||
'desc': 'processor configuration error',
|
||||
'desc': 'Processor configuration error',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
6: {
|
||||
'desc': 'uncorrectable cpu complex error',
|
||||
'desc': 'Uncorrectable cpu complex error',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
7: {
|
||||
@@ -386,19 +445,19 @@ sensor_type_offsets = {
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
9: {
|
||||
'desc': 'processor terminator presence detected',
|
||||
'desc': 'Processor terminator presence detected',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
0xa: {
|
||||
'desc': 'processor throttled',
|
||||
'desc': 'Processor throttled',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
0xb: {
|
||||
'desc': 'uncorrectable machine check exception',
|
||||
'desc': 'Uncorrectable machine check exception',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
0xc: {
|
||||
'desc': 'correctable machine check exception',
|
||||
'desc': 'Correctable machine check exception',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
},
|
||||
@@ -408,28 +467,28 @@ sensor_type_offsets = {
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
1: {
|
||||
'desc': 'power supply failure',
|
||||
'desc': 'Power supply failure',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
2: {
|
||||
'desc': 'power supply predictive failure',
|
||||
'desc': 'Power supply predictive failure',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
3: {
|
||||
'desc': 'power supply input lost',
|
||||
'desc': 'Power supply input lost',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
4: {
|
||||
'desc': 'power supply input out of range or lost',
|
||||
'desc': 'Power supply input out of range or lost',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
5: {
|
||||
'desc': 'power supply input out of range',
|
||||
'desc': 'Power supply input out of range',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
6: {
|
||||
# clarified by SEL/PET event data 3
|
||||
'desc': 'power supply configuration error',
|
||||
'desc': 'Power supply configuration error',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
7: {
|
||||
@@ -439,11 +498,11 @@ sensor_type_offsets = {
|
||||
},
|
||||
9: { # power unit
|
||||
0: {
|
||||
'desc': 'power off/down',
|
||||
'desc': 'Power off',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
1: {
|
||||
'desc': 'power cycle',
|
||||
'desc': 'Power cycle',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
2: {
|
||||
@@ -451,19 +510,19 @@ sensor_type_offsets = {
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
3: {
|
||||
'desc': 'interlock power down',
|
||||
'desc': 'Interlock power down',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
4: {
|
||||
'desc': 'power input lost',
|
||||
'desc': 'Power input lost',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
5: {
|
||||
'desc': 'soft power control failure',
|
||||
'desc': 'Soft power control failure',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
6: {
|
||||
'desc': 'power unit failure',
|
||||
'desc': 'Power unit failure',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
7: {
|
||||
@@ -473,27 +532,27 @@ sensor_type_offsets = {
|
||||
},
|
||||
0xc: { # memory
|
||||
0: {
|
||||
'desc': 'correctable memory error',
|
||||
'desc': 'Correctable memory error',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
1: {
|
||||
'desc': 'uncorrectable memory error',
|
||||
'desc': 'Uncorrectable memory error',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
2: {
|
||||
'desc': 'memory parity',
|
||||
'desc': 'Memory parity',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
3: {
|
||||
'desc': 'memory scrub failed',
|
||||
'desc': 'Memory scrub failed',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
4: {
|
||||
'desc': 'memory device disabled',
|
||||
'desc': 'Memory device disabled',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
5: {
|
||||
'desc': 'correctable memory error logging limit reached',
|
||||
'desc': 'Correctable memory error logging limit reached',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
6: {
|
||||
@@ -523,35 +582,35 @@ sensor_type_offsets = {
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
1: {
|
||||
'desc': 'drive fault',
|
||||
'desc': 'Drive fault',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
2: {
|
||||
'desc': 'predictive drive failure',
|
||||
'desc': 'Predictive drive failure',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
3: {
|
||||
'desc': 'hot spare drive',
|
||||
'desc': 'Hot spare drive',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
4: {
|
||||
'desc': 'drive consitency check in progress',
|
||||
'desc': 'Drive consitency check in progress',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
5: {
|
||||
'desc': 'drive in critical array',
|
||||
'desc': 'Drive in critical array',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
6: {
|
||||
'desc': 'drive in failed array',
|
||||
'desc': 'Drive in failed array',
|
||||
'severity': const.Health.Failed,
|
||||
},
|
||||
7: {
|
||||
'desc': 'rebuild in progress',
|
||||
'desc': 'Rebuild in progress',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
8: {
|
||||
'desc': 'rebuild aborted',
|
||||
'desc': 'Rebuild aborted',
|
||||
'severity': const.Health.Critical,
|
||||
},
|
||||
},
|
||||
@@ -1103,10 +1162,43 @@ sensor_type_offsets = {
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
},
|
||||
0x2c: { # FRU state
|
||||
0: {
|
||||
'desc': 'Not Installed',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
1: {
|
||||
'desc': 'Inactive',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
2: {
|
||||
'desc': 'Activation Requested',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
3: {
|
||||
'desc': 'Activation in progress',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
4: {
|
||||
'desc': 'Active',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
5: {
|
||||
'desc': 'Deactivation requested',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
6: {
|
||||
'desc': 'Deactivation in progress',
|
||||
'severity': const.Health.Ok,
|
||||
},
|
||||
7: {
|
||||
'desc': 'Communication Lost',
|
||||
'severity': const.Health.Warning,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
#entity ids from table 43-13 entity id codes
|
||||
# entity ids from table 43-13 entity id codes
|
||||
entity_ids = {
|
||||
0x0: 'unspecified',
|
||||
0x1: 'other',
|
||||
|
@@ -378,7 +378,7 @@ class SDREntry(object):
|
||||
self.decode_formula(entry[19:25])
|
||||
|
||||
def _decode_state(self, state):
|
||||
mapping = ipmiconst.discrete_type_offsets
|
||||
mapping = ipmiconst.generic_type_offsets
|
||||
try:
|
||||
if self.reading_type in mapping:
|
||||
desc = mapping[self.reading_type][state]['desc']
|
||||
|
Reference in New Issue
Block a user