2
0
mirror of https://github.com/xcat2/confluent.git synced 2025-01-13 11:17:49 +00:00
Jarrod Johnson 2073926256 Improve ctrl-c and other behaviors of osdeploy import
More reliably delete an import attempt to avoid odd behaviors.
2021-05-27 16:10:06 -04:00

385 lines
17 KiB
Python

#!/usr/bin/python2
__author__ = 'jjohnson2,bfinley'
import argparse
import glob
import os
import os.path
import pwd
import shutil
import sys
import time
path = os.path.dirname(os.path.realpath(__file__))
path = os.path.realpath(os.path.join(path, '..', 'lib', 'python'))
if path.startswith('/opt'):
sys.path.append(path)
import confluent.collective.manager as collective
import eventlet.green.subprocess as subprocess
import confluent.selfservice as selfservice
import confluent.util as util
import confluent.client as client
import confluent.sshutil as sshutil
import confluent.certutil as certutil
try:
input = raw_input
except NameError:
pass
def emprint(txt):
if sys.stdout.isatty():
print('\x1b[1m\x1b[4m' + txt + '\x1b[0m')
else:
print(txt)
fnamechars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.^'
def main(args):
ap = argparse.ArgumentParser(description='Manage OS deployment resources')
sp = ap.add_subparsers(dest='command')
wiz = sp.add_parser('initialize', help='Do OS deployment preparation')
wiz.add_argument('-a', help='Initialize SSH access by confluent to nodes for automation such as ansible playbook execution or syncfiles', action='store_true')
wiz.add_argument('-g', help='Initialize a Genesis profile to boot systems into a rescue or staging environment', action='store_true')
wiz.add_argument('-u', help='Pull in root user key for node deployment', action='store_true')
wiz.add_argument('-s', help='Set up SSH CA for managing node to node ssh and known hosts', action='store_true')
wiz.add_argument('-k', help='Update local global known hosts file with confluent CA', action='store_true')
wiz.add_argument('-t', help='Generate new TLS key for HTTPS operation and register with confluent repository', action='store_true')
wiz.add_argument('-p', help='Copy in TFTP contents required for PXE support', action='store_true')
wiz.add_argument('-i', help='Interactively prompt for behaviors', action='store_true')
wiz.add_argument('-l', help='Set up local management node to allow login from managed nodes', action='store_true')
osip = sp.add_parser('import', help='Import an OS image from an ISO image')
osip.add_argument('imagefile', help='File to use for source of importing')
upb = sp.add_parser(
'updateboot',
help='Push profile.yaml of the named profile data into boot assets as appropriate')
upb.add_argument('profile', help='Profile to update boot assets')
osls = sp.add_parser('list', help='List OS images available for deployment')
cmdset = ap.parse_args()
if cmdset.command == 'list':
return oslist()
if cmdset.command == 'import':
return osimport(cmdset.imagefile)
if cmdset.command == 'initialize':
return initialize(cmdset)
if cmdset.command == 'updateboot':
return updateboot(cmdset.profile)
ap.print_help()
def initialize_genesis():
if not os.path.exists('/opt/confluent/genesis/x86_64/boot/kernel'):
emprint('Install the confluent-genesis package to have the '
'resources for a genesis profile')
return 1
hasconfluentuser = None
try:
hasconfluentuser = pwd.getpwnam('confluent')
except KeyError:
pass
pid = os.fork()
if pid:
retval = os.waitpid(pid, 0)
return retval[1]
retcode = 0
try:
if hasconfluentuser:
os.setgid(hasconfluentuser.pw_gid)
os.setuid(hasconfluentuser.pw_uid)
os.umask(0o22)
os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755)
os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755)
os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI',
'/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/BOOTX64.EFI')
os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi',
'/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/grubx64.efi')
os.symlink('/opt/confluent/genesis/x86_64/boot/initramfs/distribution',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/distribution')
os.symlink('/var/lib/confluent/public/site/initramfs.cpio',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/site.cpio')
os.symlink('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/addons.cpio')
os.symlink('/opt/confluent/genesis/x86_64/boot/kernel',
'/var/lib/confluent/public/os/genesis-x86_64/boot/kernel')
shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/ansible/',
'/var/lib/confluent/public/os/genesis-x86_64/ansible/')
shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/',
'/var/lib/confluent/public/os/genesis-x86_64/scripts/')
shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml',
'/var/lib/confluent/public/os/genesis-x86_64/profile.yaml')
except Exception:
retcode = 1
finally:
os._exit(retcode)
def local_node_trust_setup():
allnodes, domain = selfservice.get_cluster_list()
myname = collective.get_myname()
myprincipals = set([myname])
restorecon = os.path.exists('/usr/sbin/restorecon')
neededlines = set([
'HostbasedAuthentication yes', 'HostbasedUsesNameFromPacketOnly yes',
'IgnoreRhosts no'])
if domain and not myname.endswith(domain):
myprincipals.add('{0}.{1}'.format(myname, domain))
if domain and '.' in myname and myname.endswith(domain):
myprincipals.add(myname.split('.')[0])
for pubkey in glob.glob('/etc/ssh/ssh_host_*_key.pub'):
currpubkey = open(pubkey, 'rb').read()
cert = sshutil.sign_host_key(currpubkey, myname, myprincipals)
certfile = pubkey.replace('key.pub', 'key-cert.pub')
neededlines.add('HostCertificate {0}'.format(certfile))
if os.path.exists(certfile):
os.unlink(certfile)
with open(certfile, 'w') as certout:
certout.write(cert)
if restorecon:
subprocess.check_call(['/usr/sbin/restorecon', certfile])
with open('/etc/ssh/sshd_config', 'r') as sshconf:
currconfig = sshconf.read().split('\n')
for conline in currconfig:
conline = conline.strip()
neededlines.discard(conline)
if neededlines:
with open('/etc/ssh/sshd_config', 'a') as cfgout:
for currline in neededlines:
cfgout.write(currline)
cfgout.write('\n')
with open('/etc/ssh/shosts.equiv', 'w') as equivout:
for node in util.natural_sort(allnodes):
equivout.write(node + '\n')
with open('/root/.shosts', 'w') as equivout:
for node in util.natural_sort(allnodes):
equivout.write(node + '\n')
if restorecon:
subprocess.check_call(
['/usr/sbin/restorecon',
'/etc/ssh/shosts.equiv', '/root/.shosts'])
def install_tftp_content():
tftplocation = None
candidates = ('/tftpboot', '/var/lib/tftpboot', '/srv/tftpboot', '/srv/tftp')
for cand in candidates:
if os.path.isdir(cand):
tftplocation = cand
break
if not tftplocation:
emprint('Unable to detect a directory for tftp content (check that tftp server is installed)')
return 1
if os.path.exists('/usr/lib/systemd/system/tftp.socket'):
if tftplocation == '/tftpboot':
emprint('/tftpboot is detected as tftp directory, will not try to automatically enable tftp, as it is presumed to be externally managed')
else:
try:
subprocess.check_call(['systemctl', 'enable', 'tftp.socket', '--now'])
print('TFTP service is enabled and running')
except Exception:
emprint('Unable to automatically enable and start tftp.socket, tftp server may already be running outside of systemd control')
else:
emprint(
'Detected {0} as tftp directory, but unable to determine tftp service, ensure that a tftp server is installed and enabled manually'.format(tftplocation))
tftplocation = '{0}/confluent/x86_64'.format(tftplocation)
try:
os.makedirs(tftplocation)
except OSError as e:
if e.errno != 17:
raise
shutil.copy('/opt/confluent/lib/ipxe/ipxe.efi', tftplocation)
shutil.copy('/opt/confluent/lib/ipxe/ipxe.kkpxe', tftplocation)
def initialize(cmdset):
if os.getuid() != 0:
sys.stderr.write('This command must run as root user\n')
sys.exit(1)
if cmdset.i:
didsomething = True
sys.stdout.write('Add root user key to be authorized to log into nodes (-u)? (y/n): ')
sys.stdout.flush()
cmdset.u = input().strip().lower().startswith('y')
sys.stdout.write('Initialize a profile to boot Genesis on target systems (a small Linux environment for rescue and staging use)? (y/n): ')
cmdset.g = input().strip().lower().startswith('y')
sys.stdout.write('Set up an SSH authority to help manage known_hosts and node to node ssh for all users (-s)? (y/n): ')
cmdset.s = input().strip().lower().startswith('y')
sys.stdout.write('Update global known hosts on this server to trust local CA certificates (-k)? (y/n): ')
cmdset.k = input().strip().lower().startswith('y')
sys.stdout.write('Allow managed nodes to ssh to this management node without a password (-l)? (y/n): ')
cmdset.l = input().strip().lower().startswith('y')
sys.stdout.write('Update tftp directory with binaries to support PXE (-p) (y/n): ')
cmdset.p = input().strip().lower().startswith('y')
sys.stdout.write('Initialize confluent ssh user key so confluent can execute remote automation (e.g. Ansible plays) (-a) (y/n): ')
cmdset.a = input().strip().lower().startswith('y')
sys.stdout.write('Generate new TLS certificates for HTTP, replacing any existing certificate (-t)? (y/n): ')
cmdset.t = input().strip().lower().startswith('y')
if not cmdset.t:
print(
'In order to use your own certificate authority, make sure '
'to put the certificate authority into '
'/var/lib/confluent/public/site/tls/ directory as a .pem file '
'as well as named (hash).0 where (hash) is the hash of the '
'subject.')
else:
didsomething = False
if cmdset.u:
didsomething = True
if not glob.glob('/root/.ssh/*.pub'):
sys.stderr.write('No user keys for root detected, it is recommended '
'to run ssh-keygen -t ed25519 to generate a user '
'key. For optimal security, a passphrase should be '
'used. ssh-agent may be used to make use of a '
'passphrase protected ssh key easier.\n')
sys.exit(1)
sshutil.initialize_root_key(False)
if cmdset.t:
didsomething = True
certutil.create_certificate()
if os.path.exists('/usr/lib/systemd/system/httpd.service'):
subprocess.check_call(['systemctl', 'try-restart', 'httpd'])
print('HTTP server has been restarted if it was running')
elif os.path.exists('/usr/lib/systemd/system/apache2.service'):
subprocess.check_call(['systemctl', 'try-restart', 'apache2'])
print('HTTP server has been restarted if it was running')
else:
emprint('New HTTPS certificates generated, restart the web server manually')
if cmdset.s:
didsomething = True
sshutil.initialize_ca()
if cmdset.a:
didsomething = True
sshutil.initialize_root_key(True, True)
if cmdset.p:
install_tftp_content()
if cmdset.l:
local_node_trust_setup()
if cmdset.k:
with open('/etc/ssh/ssh_known_hosts', 'a+b') as skh:
for cafile in glob.glob('/var/lib/confluent/public/site/ssh/*.ca'):
cacert = open(cafile, 'rb').read()
cacert = b'@cert-authority * ' + cacert
skh.write(cacert)
if cmdset.g:
initialize_genesis()
if not didsomething and (cmdset.k or cmdset.l or cmdset.g or cmdset.p):
if cmdset.g:
updateboot('genesis-x86_64')
sys.exit(0)
if not didsomething:
sys.stderr.write('Nothing was done, use initialize -i for '
'interactive mode, or see initialize -h for more options\n')
sys.exit(1)
tmpname = '/var/lib/confluent/public/site/initramfs.cpio.'
for x in bytearray(os.urandom(22)):
tmpname += fnamechars[x >> 2]
topack = []
opath = os.getcwd()
os.chdir('/var/lib/confluent/public/site')
topack.append('ssh/')
for currd, _, files in os.walk('ssh'):
for fname in files:
topack.append(os.path.join(currd, fname))
topack.append('tls/')
for currd, _, files in os.walk('tls'):
for fname in files:
topack.append(os.path.join(currd, fname))
with open(tmpname, 'wb') as initramfs:
packit = subprocess.Popen(['cpio', '-H', 'newc', '-o'],
stdout=initramfs, stdin=subprocess.PIPE)
for packfile in topack:
if not isinstance(packfile, bytes):
packfile = packfile.encode('utf8')
packit.stdin.write(packfile)
packit.stdin.write(b'\n')
packit.stdin.close()
res = packit.wait()
if res:
sys.stderr.write('Error occurred while packing site initramfs')
sys.exit(1)
os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio')
if cmdset.g:
updateboot('genesis-x86_64')
tmptarname = tmpname.replace('cpio', 'tgz')
tarcmd = ['tar', '-czf', tmptarname, 'ssh', 'tls']
subprocess.check_call(tarcmd)
os.chdir(opath)
os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz')
print('Site initramfs content packed successfully')
if not os.path.exists('/etc/confluent/srvcert.pem'):
subprocess.check_call(['collective', 'gencert'])
# TODO: check selinux and segetbool for httpd_can_network_connect
# httpd available and enabled?
def updateboot(profilename):
c = client.Command()
for rsp in c.update('/deployment/profiles/{0}'.format(profilename),
{'updateboot': 1}):
if 'updated' in rsp:
print('Updated: {0}'.format(rsp['updated']))
else:
print(repr(rsp))
def oslist():
c = client.Command()
print("Distributions:")
for rsp in c.read('/deployment/distributions'):
if 'error' in rsp:
sys.stderr.write(res['error'] + '\n')
exitcode = 1
else:
print(" " + rsp['item']['href'].replace('/', ''))
print("")
print("Profiles:")
for rsp in c.read('/deployment/profiles'):
if 'error' in rsp:
sys.stderr.write(res['error'] + '\n')
exitcode = 1
else:
print(" " + rsp['item']['href'].replace('/', ''))
print("")
def osimport(imagefile):
c = client.Command()
imagefile = os.path.abspath(imagefile)
if c.unixdomain:
ofile = open(imagefile, 'rb')
try:
c.add_file(imagefile, ofile.fileno(), 'rb')
except Exception:
pass
importing = False
shortname = None
for rsp in c.create('/deployment/importing/', {'filename': imagefile}):
if 'target' in rsp:
importing = True
shortname = rsp['name']
print('Importing from {0} to {1}'.format(imagefile, rsp['target']))
else:
print(repr(rsp))
try:
while importing:
for rsp in c.read('/deployment/importing/{0}'.format(shortname)):
if 'progress' in rsp:
sys.stdout.write('{0}: {1:.2f}% \r'.format(rsp['phase'],
rsp['progress']))
if rsp['phase'] == 'complete':
importing = False
sys.stdout.write('\n')
for profile in rsp['profiles']:
print('Deployment profile created: {0}'.format(profile))
sys.stdout.flush()
else:
print(repr(rsp))
time.sleep(0.5)
finally:
if shortname:
list(c.delete('/deployment/importing/{0}'.format(shortname)))
if __name__ == '__main__':
main(sys.argv)