2
0
mirror of https://github.com/xcat2/confluent.git synced 2024-11-22 17:43:14 +00:00
confluent/confluent_server/bin/osdeploy
Jarrod Johnson 4fe84ca6dc Fix various issues in 3.0.0 release
If the kernel is new enough to do SKU, but the firmware doesn't have it,
fallback to model.

Fix outright mistakes in the config_port_tsm code

Up mac count from 2 to 3. If querying cumulus switches using SNMP
the switch will add its own mac to the list bringing the
total for a shared port to 3.
2020-09-09 10:58:37 -04:00

342 lines
15 KiB
Python

#!/usr/bin/python2
import argparse
import glob
import os
import os.path
import pwd
import shutil
import sys
import time
path = os.path.dirname(os.path.realpath(__file__))
path = os.path.realpath(os.path.join(path, '..', 'lib', 'python'))
if path.startswith('/opt'):
sys.path.append(path)
import confluent.collective.manager as collective
import eventlet.green.subprocess as subprocess
import confluent.selfservice as selfservice
import confluent.util as util
import confluent.client as client
import confluent.sshutil as sshutil
import confluent.certutil as certutil
try:
input = raw_input
except NameError:
pass
def emprint(txt):
if sys.stdout.isatty():
print('\x1b[1m\x1b[4m' + txt + '\x1b[0m')
else:
print(txt)
fnamechars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.^'
def main(args):
ap = argparse.ArgumentParser(description='Manage OS deployment resources')
sp = ap.add_subparsers(dest='command')
wiz = sp.add_parser('initialize', help='Do OS deployment preparation')
wiz.add_argument('-g', help='Initialize a Genesis profile to boot systems into a rescue or staging environment', action='store_true')
wiz.add_argument('-u', help='Pull in root user key for node deployment', action='store_true')
wiz.add_argument('-s', help='Set up SSH CA for managing node to node ssh and known hosts', action='store_true')
wiz.add_argument('-k', help='Update local global known hosts file with confluent CA', action='store_true')
wiz.add_argument('-t', help='Generate new TLS key for HTTPS operation and register with confluent repository', action='store_true')
wiz.add_argument('-p', help='Copy in TFTP contents required for PXE support', action='store_true')
wiz.add_argument('-i', help='Interactively prompt for behaviors', action='store_true')
wiz.add_argument('-l', help='Set up local management node to allow login from managed nodes', action='store_true')
osip = sp.add_parser('import', help='Import an OS image from an ISO image')
osip.add_argument('imagefile', help='File to use for source of importing')
upb = sp.add_parser(
'updateboot',
help='Push profile.yaml of the named profile data into boot assets as appropriate')
upb.add_argument('profile', help='Profile to update boot assets')
cmdset = ap.parse_args()
if cmdset.command == 'import':
return osimport(cmdset.imagefile)
if cmdset.command == 'initialize':
return initialize(cmdset)
if cmdset.command == 'updateboot':
return updateboot(cmdset.profile)
ap.print_help()
def initialize_genesis():
if not os.path.exists('/opt/confluent/genesis/x86_64/boot/kernel'):
emprint('Install the confluent-genesis package to have the '
'resources for a genesis profile')
return 1
hasconfluentuser = None
try:
hasconfluentuser = pwd.getpwnam('confluent')
except KeyError:
pass
pid = os.fork()
if pid:
retval = os.waitpid(pid, 0)
return retval[1]
retcode = 0
try:
if hasconfluentuser:
os.setgid(hasconfluentuser.pw_gid)
os.setuid(hasconfluentuser.pw_uid)
os.umask(0o22)
os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755)
os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755)
os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI',
'/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/BOOTX64.EFI')
os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi',
'/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/grubx64.efi')
os.symlink('/opt/confluent/genesis/x86_64/boot/initramfs/distribution',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/distribution')
os.symlink('/var/lib/confluent/public/site/initramfs.cpio',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/site.cpio')
os.symlink('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/addons.cpio')
os.symlink('/opt/confluent/genesis/x86_64/boot/kernel',
'/var/lib/confluent/public/os/genesis-x86_64/boot/kernel')
shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/',
'/var/lib/confluent/public/os/genesis-x86_64/scripts/')
shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml',
'/var/lib/confluent/public/os/genesis-x86_64/profile.yaml')
except Exception:
retcode = 1
finally:
os._exit(retcode)
def local_node_trust_setup():
allnodes, domain = selfservice.get_cluster_list()
myname = collective.get_myname()
myprincipals = set([myname])
restorecon = os.path.exists('/usr/sbin/restorecon')
neededlines = set([
'HostbasedAuthentication yes', 'HostbasedUsesNameFromPacketOnly yes',
'IgnoreRhosts no'])
if domain and not myname.endswith(domain):
myprincipals.add('{0}.{1}'.format(myname, domain))
if domain and '.' in myname and myname.endswith(domain):
myprincipals.add(myname.split('.')[0])
for pubkey in glob.glob('/etc/ssh/ssh_host_*_key.pub'):
currpubkey = open(pubkey, 'rb').read()
cert = sshutil.sign_host_key(currpubkey, myname, myprincipals)
certfile = pubkey.replace('key.pub', 'key-cert.pub')
neededlines.add('HostCertificate {0}'.format(certfile))
if os.path.exists(certfile):
os.unlink(certfile)
with open(certfile, 'w') as certout:
certout.write(cert)
if restorecon:
subprocess.check_call(['/usr/sbin/restorecon', certfile])
with open('/etc/ssh/sshd_config', 'r') as sshconf:
currconfig = sshconf.read().split('\n')
for conline in currconfig:
conline = conline.strip()
neededlines.discard(conline)
if neededlines:
with open('/etc/ssh/sshd_config', 'a') as cfgout:
for currline in neededlines:
cfgout.write(currline)
cfgout.write('\n')
with open('/etc/ssh/shosts.equiv', 'w') as equivout:
for node in util.natural_sort(allnodes):
equivout.write(node + '\n')
with open('/root/.shosts', 'w') as equivout:
for node in util.natural_sort(allnodes):
equivout.write(node + '\n')
if restorecon:
subprocess.check_call(
['/usr/sbin/restorecon',
'/etc/ssh/shosts.equiv', '/root/.shosts'])
def install_tftp_content():
tftplocation = None
candidates = ('/tftpboot', '/var/lib/tftpboot', '/srv/tftpboot', '/srv/tftp')
for cand in candidates:
if os.path.isdir(cand):
tftplocation = cand
break
if not tftplocation:
emprint('Unable to detect a directory for tftp content (check that tftp server is installed)')
return 1
if os.path.exists('/usr/lib/systemd/system/tftp.socket'):
if tftplocation == '/tftpboot':
emprint('/tftpboot is detected as tftp directory, will not try to automatically enable tftp, as it is presumed to be externally managed')
else:
try:
subprocess.check_call(['systemctl', 'enable', 'tftp.socket', '--now'])
print('TFTP service is enabled and running')
except Exception:
emprint('Unable to automatically enable and start tftp.socket, tftp server may already be running outside of systemd control')
else:
emprint(
'Detected {0} as tftp directory, but unable to determine tftp service, ensure that a tftp server is installed and enabled manually'.format(tftplocation))
tftplocation = '{0}/confluent/x86_64'.format(tftplocation)
try:
os.makedirs(tftplocation)
except OSError as e:
if e.errno != 17:
raise
shutil.copy('/opt/confluent/lib/ipxe/ipxe.efi', tftplocation)
shutil.copy('/opt/confluent/lib/ipxe/ipxe.kkpxe', tftplocation)
def initialize(cmdset):
if os.getuid() != 0:
sys.stderr.write('This command must run as root user\n')
sys.exit(1)
if cmdset.i:
didsomething = True
sys.stdout.write('Add root user key to be authorized to log into nodes (-u)? (y/n): ')
sys.stdout.flush()
cmdset.u = input().strip().lower().startswith('y')
sys.stdout.write('Initialize a profile to boot Genesis on target systems (a small Linux environment for rescue and staging use)? (y/n): ')
cmdset.g = input().strip().lower().startswith('y')
sys.stdout.write('Set up an SSH authority to help manage known_hosts and node to node ssh for all users (-s)? (y/n): ')
cmdset.s = input().strip().lower().startswith('y')
sys.stdout.write('Update global known hosts on this server to trust local CA certificates (-k)? (y/n): ')
cmdset.k = input().strip().lower().startswith('y')
sys.stdout.write('Allow managed nodes to ssh to this management node without a password (-l)? (y/n): ')
cmdset.l = input().strip().lower().startswith('y')
sys.stdout.write('Update tftp directory with binaries to support PXE (-p) (y/n): ')
cmdset.p = input().strip().lower().startswith('y')
sys.stdout.write('Generate new TLS certificates for HTTP, replacing any existing certificate (-t)? (y/n): ')
cmdset.t = input().strip().lower().startswith('y')
if not cmdset.t:
print(
'In order to use your own certificate authority, make sure '
'to put the certificate authority into '
'/var/lib/confluent/public/site/tls/ directory as a .pem file '
'as well as named (hash).0 where (hash) is the hash of the '
'subject.')
else:
didsomething = False
if cmdset.u:
didsomething = True
if not glob.glob('/root/.ssh/*.pub'):
sys.stderr.write('No user keys for root detected, it is recommended '
'to run ssh-keygen -t ed25519 to generate a user '
'key. For optimal security, a passphrase should be '
'used. ssh-agent may be used to make use of a '
'passphrase protected ssh key easier.\n')
sys.exit(1)
sshutil.initialize_root_key(False)
if cmdset.t:
didsomething = True
certutil.create_certificate()
if os.path.exists('/usr/lib/systemd/system/httpd.service'):
subprocess.check_call(['systemctl', 'try-restart', 'httpd'])
print('HTTP server has been restarted if it was running')
elif os.path.exists('/usr/lib/systemd/system/apache2.service'):
subprocess.check_call(['systemctl', 'try-restart', 'apache2'])
print('HTTP server has been restarted if it was running')
else:
emprint('New HTTPS certificates generated, restart the web server manually')
if cmdset.s:
didsomething = True
sshutil.initialize_ca()
if cmdset.p:
install_tftp_content()
if cmdset.l:
local_node_trust_setup()
if cmdset.k:
with open('/etc/ssh/ssh_known_hosts', 'a+b') as skh:
for cafile in glob.glob('/var/lib/confluent/public/site/ssh/*.ca'):
cacert = open(cafile, 'rb').read()
cacert = b'@cert-authority * ' + cacert
skh.write(cacert)
if cmdset.g:
initialize_genesis()
if not didsomething and (cmdset.k or cmdset.l or cmdset.g or cmdset.p):
if cmdset.g:
updateboot('genesis-x86_64')
sys.exit(0)
if not didsomething:
sys.stderr.write('Nothing was done, use initialize -i for '
'interactive mode, or see initialize -h for more options\n')
sys.exit(1)
tmpname = '/var/lib/confluent/public/site/initramfs.cpio.'
for x in bytearray(os.urandom(22)):
tmpname += fnamechars[x >> 2]
topack = []
opath = os.getcwd()
os.chdir('/var/lib/confluent/public/site')
topack.append('ssh/')
for currd, _, files in os.walk('ssh'):
for fname in files:
topack.append(os.path.join(currd, fname))
topack.append('tls/')
for currd, _, files in os.walk('tls'):
for fname in files:
topack.append(os.path.join(currd, fname))
with open(tmpname, 'wb') as initramfs:
packit = subprocess.Popen(['cpio', '-H', 'newc', '-o'],
stdout=initramfs, stdin=subprocess.PIPE)
for packfile in topack:
if not isinstance(packfile, bytes):
packfile = packfile.encode('utf8')
packit.stdin.write(packfile)
packit.stdin.write(b'\n')
packit.stdin.close()
res = packit.wait()
if res:
sys.stderr.write('Error occurred while packing site initramfs')
sys.exit(1)
os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio')
if cmdset.g:
updateboot('genesis-x86_64')
tmptarname = tmpname.replace('cpio', 'tgz')
tarcmd = ['tar', '-czf', tmptarname, 'ssh', 'tls']
subprocess.check_call(tarcmd)
os.chdir(opath)
os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz')
print('Site initramfs content packed successfully')
if not os.path.exists('/etc/confluent/srvcert.pem'):
subprocess.check_call(['collective', 'gencert'])
# TODO: check selinux and segetbool for httpd_can_network_connect
# httpd available and enabled?
def updateboot(profilename):
c = client.Command()
for rsp in c.update('/deployment/profiles/{0}'.format(profilename),
{'updateboot': 1}):
if 'updated' in rsp:
print('Updated: {0}'.format(rsp['updated']))
else:
print(repr(rsp))
def osimport(imagefile):
c = client.Command()
imagefile = os.path.abspath(imagefile)
importing = False
shortname = None
for rsp in c.create('/deployment/importing/', {'filename': imagefile}):
if 'target' in rsp:
importing = True
shortname = rsp['name']
print('Importing from {0} to {1}'.format(imagefile, rsp['target']))
else:
print(repr(rsp))
while importing:
for rsp in c.read('/deployment/importing/{0}'.format(shortname)):
if 'progress' in rsp:
sys.stdout.write('{0}: {1:.2f}% \r'.format(rsp['phase'],
rsp['progress']))
if rsp['phase'] == 'complete':
importing = False
sys.stdout.write('\n')
for profile in rsp['profiles']:
print('Deployment profile created: {0}'.format(profile))
sys.stdout.flush()
else:
print(repr(rsp))
time.sleep(0.5)
if shortname:
list(c.delete('/deployment/importing/{0}'.format(shortname)))
if __name__ == '__main__':
main(sys.argv)