#!/usr/bin/python3 # frequent problems to check/repair # repair would be to set the uuid global to match filesystem for least disruptive change # # import os import socket import glob import ssl import sys import confluent.sshutil as sshutil import confluent.certutil as certutil import confluent.config.configmanager as configmanager import subprocess import tempfile import shutil def fprint(txt): sys.stdout.write(txt) sys.stdout.flush() def tftp_works(): try: subprocess.check_call(['curl', '--connect-timeout', '2', '-sf', 'tftp://localhost/confluent/x86_64/ipxe.efi', '-o', '/dev/null']) return True except Exception: return False def emprint(txt): if sys.stdout.isatty(): print('\x1b[1m\x1b[4m' + txt + '\x1b[0m') else: print(txt) def deployment_configured(): return os.path.exists('/var/lib/confluent/public/site/confluent_uuid') def webserver_listening(): try: conn = socket.create_connection(('localhost', 443)) return conn except Exception: return False def certificates_missing_ips(conn): # check if the tls can verify by the right CAs, then further # check if all ip addresses are in the certificate offered ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ctx.check_hostname = False for cacert in glob.glob('/var/lib/confluent/public/site/tls/*.pem'): ctx.load_verify_locations(cacert) sock = ctx.wrap_socket(conn) crt = sock.getpeercert() sans = crt.get('subjectAltName', []) ips = certutil.get_ip_addresses() missing_ips = [] for ip in ips: for san in sans: field, val = san if val[-1] == '\n': val = val[:-1] if ':' in val: # must normalize ipv6 to a sane value val = socket.getaddrinfo(val, 443, type=socket.SOCK_STREAM)[0][-1][0] if ip == val: break else: missing_ips.append(ip) return missing_ips def is_ipv6_enabled(): # check for ability to create AF_INET6, for kernel disabled ipv6 pass # for every interface with an ipv4 address, check if there's an fe80 as well # warn that os deployment and discovery services may be impacted for afflicted # interface # check for http access to confluent-public, use site.cpio as file to check? # check for deployment.useinsecureprotocols=firmware def uuid_matches(): with open('/var/lib/confluent/public/site/confluent_uuid', 'r') as uuidf: fsuuid = uuidf.read().strip() dbuuid = configmanager.get_global('confluent_uuid') return dbuuid == fsuuid if __name__ == '__main__': sys.stdout.write('OS Deployment: ') sys.stdout.flush() if deployment_configured(): print("Initialized") sys.stdout.write('Confluent UUID: ') sys.stdout.flush() if uuid_matches(): print('Consistent') else: #TODO: need a resolution to suggest emprint('Inconsistent between confluent database and /var/lib/confluent') fprint('Web Server: ') conn = webserver_listening() if conn: print('Running') fprint('Web Certificate: ') cert = certificates_missing_ips(conn) if cert: cert = ', '.join(cert) emprint('Addresses missing from certificate: {0} (Example resolution: osdeploy initialize -t)'.format(cert)) else: print('OK') else: emprint('Not Running (Example resolution: systemctl enable httpd --now)') fprint('TFTP Status: ') if tftp_works(): print('OK') else: emprint('TFTP failure, PXE will not work, though media and HTTP boot can still work. (Example resolution: osdeploy initialize -p)') fprint('SSH root user public key: ') if glob.glob('/var/lib/confluent/public/site/ssh/*.rootpubkey'): print('OK') else: emprint('No trusted ssh keys for root user, passwordless SSH from managers to nodes may not work (Example resolution: osdeploy initialize -u)') if sshutil.sshver() > 7.6: fprint('Checking SSH Certificate authority: ') try: sshutil.prep_ssh_key('/etc/confluent/ssh/ca') except Exception: emprint('Failed to load SSH authority key, deployed servers will not have host certificates for known_hosts and users may be unable to ssh between nodes without a password (Example resolution: osdeploy initialize -s)') fprint('Checking confluent SSH automation key: ') try: sshutil.prep_ssh_key('/etc/confluent/ssh/automation') except subprocess.CalledProcessError: emprint('Failed to load confluent automation key, syncfiles and profile ansible plays will not work (Example resolution: osdeploy initialize -a)') else: print("Uninitialized, further OS deployment checks skipped, see `osdeploy initialize` to set up OS deployment feature")