2
0
mirror of https://github.com/xcat2/confluent.git synced 2024-11-22 17:43:14 +00:00
confluent/confluent_server/bin/osdeploy
Jarrod Johnson b809514ef9 Fix osdeploy initialize dependency on master key
Make sure confluent has made /etc/confluent, and further always initialize the
encryption key, as it will almost certainly
be needed and easiest to just always
generate on first startup.
2022-02-08 16:40:41 -05:00

451 lines
19 KiB
Python

#!/usr/bin/python2
__author__ = 'jjohnson2,bfinley'
import argparse
import glob
import os
import os.path
import pwd
import shutil
import sys
import time
path = os.path.dirname(os.path.realpath(__file__))
path = os.path.realpath(os.path.join(path, '..', 'lib', 'python'))
if path.startswith('/opt'):
sys.path.append(path)
import confluent.collective.manager as collective
import eventlet.green.subprocess as subprocess
import confluent.selfservice as selfservice
import confluent.util as util
import confluent.client as client
import confluent.sshutil as sshutil
import confluent.certutil as certutil
try:
input = raw_input
except NameError:
pass
def emprint(txt):
if sys.stdout.isatty():
print('\x1b[1m\x1b[4m' + txt + '\x1b[0m')
else:
print(txt)
fnamechars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.^'
def main(args):
ap = argparse.ArgumentParser(description='Manage OS deployment resources')
sp = ap.add_subparsers(dest='command')
wiz = sp.add_parser('initialize', help='Do OS deployment preparation')
wiz.add_argument('-a', help='Initialize SSH access by confluent to nodes for automation such as ansible playbook execution or syncfiles', action='store_true')
wiz.add_argument('-g', help='Initialize a Genesis profile to boot systems into a rescue or staging environment', action='store_true')
wiz.add_argument('-u', help='Pull in root user key for node deployment', action='store_true')
wiz.add_argument('-s', help='Set up SSH CA for managing node to node ssh and known hosts', action='store_true')
wiz.add_argument('-k', help='Update local global known hosts file with confluent CA', action='store_true')
wiz.add_argument('-t', help='Generate new TLS key for HTTPS operation and register with confluent repository', action='store_true')
wiz.add_argument('-p', help='Copy in TFTP contents required for PXE support', action='store_true')
wiz.add_argument('-i', help='Interactively prompt for behaviors', action='store_true')
wiz.add_argument('-l', help='Set up local management node to allow login from managed nodes', action='store_true')
osip = sp.add_parser('import', help='Import an OS image from an ISO image')
osip.add_argument('imagefile', help='File to use for source of importing')
upb = sp.add_parser(
'updateboot',
help='Push profile.yaml of the named profile data into boot assets as appropriate')
upb.add_argument('profile', help='Profile to update boot assets')
osls = sp.add_parser('list', help='List OS images available for deployment')
cmdset = ap.parse_args()
if cmdset.command == 'list':
return oslist()
if cmdset.command == 'import':
return osimport(cmdset.imagefile)
if cmdset.command == 'initialize':
return initialize(cmdset)
if cmdset.command == 'updateboot':
return updateboot(cmdset.profile)
ap.print_help()
def initialize_genesis():
if not os.path.exists('/opt/confluent/genesis/x86_64/boot/kernel'):
emprint('Install the confluent-genesis package to have the '
'resources for a genesis profile')
return 1
hasconfluentuser = None
try:
hasconfluentuser = pwd.getpwnam('confluent')
except KeyError:
pass
pid = os.fork()
if pid:
retval = os.waitpid(pid, 0)
return retval[1]
retcode = 0
try:
if hasconfluentuser:
os.setgid(hasconfluentuser.pw_gid)
os.setuid(hasconfluentuser.pw_uid)
os.umask(0o22)
os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755)
os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755)
os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI',
'/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/BOOTX64.EFI')
os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi',
'/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/grubx64.efi')
os.symlink('/opt/confluent/genesis/x86_64/boot/initramfs/distribution',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/distribution')
os.symlink('/var/lib/confluent/public/site/initramfs.cpio',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/site.cpio')
os.symlink('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio',
'/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/addons.cpio')
os.symlink('/opt/confluent/genesis/x86_64/boot/kernel',
'/var/lib/confluent/public/os/genesis-x86_64/boot/kernel')
shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/ansible/',
'/var/lib/confluent/public/os/genesis-x86_64/ansible/')
shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/',
'/var/lib/confluent/public/os/genesis-x86_64/scripts/')
shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml',
'/var/lib/confluent/public/os/genesis-x86_64/profile.yaml')
except Exception as e:
sys.stderr.write(str(e) + '\n')
retcode = 1
finally:
os._exit(retcode)
mynamedone = False
def init_confluent_myname():
global mynamedone
if mynamedone:
return
mynamedone = True
hasconfluentuser = None
neededuid = os.stat('/etc/confluent').st_uid
if neededuid == 0:
return
try:
hasconfluentuser = pwd.getpwnam('confluent')
except KeyError:
pass
if hasconfluentuser:
print("Ok")
pid = os.fork()
if pid:
os.waitpid(pid, 0)
else:
os.setgid(hasconfluentuser.pw_gid)
os.setuid(hasconfluentuser.pw_uid)
collective.get_myname()
os._exit(0)
def local_node_trust_setup():
init_confluent_myname()
allnodes, domain = selfservice.get_cluster_list()
myname = collective.get_myname()
myprincipals = set([myname])
restorecon = os.path.exists('/usr/sbin/restorecon')
neededlines = set([
'HostbasedAuthentication yes', 'HostbasedUsesNameFromPacketOnly yes',
'IgnoreRhosts no'])
if domain and not myname.endswith(domain):
myprincipals.add('{0}.{1}'.format(myname, domain))
if domain and '.' in myname and myname.endswith(domain):
myprincipals.add(myname.split('.')[0])
for pubkey in glob.glob('/etc/ssh/ssh_host_*_key.pub'):
currpubkey = open(pubkey, 'rb').read()
cert = sshutil.sign_host_key(currpubkey, myname, myprincipals)
certfile = pubkey.replace('key.pub', 'key-cert.pub')
neededlines.add('HostCertificate {0}'.format(certfile))
if os.path.exists(certfile):
os.unlink(certfile)
with open(certfile, 'w') as certout:
certout.write(cert)
if restorecon:
subprocess.check_call(['/usr/sbin/restorecon', certfile])
with open('/etc/ssh/sshd_config', 'r') as sshconf:
currconfig = sshconf.read().split('\n')
for conline in currconfig:
conline = conline.strip()
neededlines.discard(conline)
if neededlines:
with open('/etc/ssh/sshd_config', 'a') as cfgout:
for currline in neededlines:
cfgout.write(currline)
cfgout.write('\n')
with open('/etc/ssh/shosts.equiv', 'w') as equivout:
for node in util.natural_sort(allnodes):
equivout.write(node + '\n')
with open('/root/.shosts', 'w') as equivout:
for node in util.natural_sort(allnodes):
equivout.write(node + '\n')
if restorecon:
subprocess.check_call(
['/usr/sbin/restorecon',
'/etc/ssh/shosts.equiv', '/root/.shosts'])
def install_tftp_content():
tftplocation = None
candidates = ('/tftpboot', '/var/lib/tftpboot', '/srv/tftpboot', '/srv/tftp')
for cand in candidates:
if os.path.isdir(cand):
tftplocation = cand
break
if not tftplocation:
emprint('Unable to detect a directory for tftp content (check that tftp server is installed)')
return 1
if os.path.exists('/usr/lib/systemd/system/tftp.socket'):
if tftplocation == '/tftpboot':
emprint('/tftpboot is detected as tftp directory, will not try to automatically enable tftp, as it is presumed to be externally managed')
else:
try:
subprocess.check_call(['systemctl', 'enable', 'tftp.socket', '--now'])
print('TFTP service is enabled and running')
except Exception:
emprint('Unable to automatically enable and start tftp.socket, tftp server may already be running outside of systemd control')
else:
emprint(
'Detected {0} as tftp directory, but unable to determine tftp service, ensure that a tftp server is installed and enabled manually'.format(tftplocation))
tftplocation = '{0}/confluent/x86_64'.format(tftplocation)
try:
os.makedirs(tftplocation)
except OSError as e:
if e.errno != 17:
raise
shutil.copy('/opt/confluent/lib/ipxe/ipxe.efi', tftplocation)
shutil.copy('/opt/confluent/lib/ipxe/ipxe.kkpxe', tftplocation)
def initialize(cmdset):
if os.getuid() != 0:
sys.stderr.write('This command must run as root user\n')
sys.exit(1)
if cmdset.i:
didsomething = True
sys.stdout.write('Add root user key to be authorized to log into nodes (-u)? (y/n): ')
sys.stdout.flush()
cmdset.u = input().strip().lower().startswith('y')
sys.stdout.write('Initialize a profile to boot Genesis on target systems (a small Linux environment for rescue and staging use)? (y/n): ')
cmdset.g = input().strip().lower().startswith('y')
sys.stdout.write('Set up an SSH authority to help manage known_hosts and node to node ssh for all users (-s)? (y/n): ')
cmdset.s = input().strip().lower().startswith('y')
sys.stdout.write('Update global known hosts on this server to trust local CA certificates (-k)? (y/n): ')
cmdset.k = input().strip().lower().startswith('y')
sys.stdout.write('Allow managed nodes to ssh to this management node without a password (-l)? (y/n): ')
cmdset.l = input().strip().lower().startswith('y')
sys.stdout.write('Update tftp directory with binaries to support PXE (-p) (y/n): ')
cmdset.p = input().strip().lower().startswith('y')
sys.stdout.write('Initialize confluent ssh user key so confluent can execute remote automation (e.g. Ansible plays) (-a) (y/n): ')
cmdset.a = input().strip().lower().startswith('y')
sys.stdout.write('Generate new TLS certificates for HTTP, replacing any existing certificate (-t)? (y/n): ')
cmdset.t = input().strip().lower().startswith('y')
if not cmdset.t:
print(
'In order to use your own certificate authority, make sure '
'to put the certificate authority into '
'/var/lib/confluent/public/site/tls/ directory as a .pem file '
'as well as named (hash).0 where (hash) is the hash of the '
'subject.')
else:
didsomething = False
if not os.path.exists('/etc/confluent'):
sys.stderr.write('Start confluent service prior to initializng OS deployment\n')
sys.exit(1)
if cmdset.t or cmdset.s or cmdset.a or cmdset.u:
neededuid = os.stat('/etc/confluent').st_uid
try:
os.makedirs('/var/lib/confluent')
os.chown('/var/lib/confluent', neededuid, -1)
except OSError as e:
if e.errno != 17:
raise
if cmdset.u:
didsomething = True
if not glob.glob('/root/.ssh/*.pub'):
sys.stderr.write('No user keys for root detected, it is recommended '
'to run ssh-keygen -t ed25519 to generate a user '
'key. For optimal security, a passphrase should be '
'used. ssh-agent may be used to make use of a '
'passphrase protected ssh key easier.\n')
sys.exit(1)
init_confluent_myname()
sshutil.initialize_root_key(False)
if cmdset.t:
didsomething = True
init_confluent_myname()
certutil.create_certificate()
if os.path.exists('/usr/lib/systemd/system/httpd.service'):
subprocess.check_call(['systemctl', 'try-restart', 'httpd'])
print('HTTP server has been restarted if it was running')
elif os.path.exists('/usr/lib/systemd/system/apache2.service'):
subprocess.check_call(['systemctl', 'try-restart', 'apache2'])
print('HTTP server has been restarted if it was running')
else:
emprint('New HTTPS certificates generated, restart the web server manually')
if cmdset.s:
didsomething = True
init_confluent_myname()
sshutil.initialize_ca()
if cmdset.a:
didsomething = True
init_confluent_myname()
sshutil.initialize_root_key(True, True)
if cmdset.p:
install_tftp_content()
if cmdset.l:
local_node_trust_setup()
if cmdset.k:
with open('/etc/ssh/ssh_known_hosts', 'a+b') as skh:
for cafile in glob.glob('/var/lib/confluent/public/site/ssh/*.ca'):
cacert = open(cafile, 'rb').read()
cacert = b'@cert-authority * ' + cacert
skh.write(cacert)
if cmdset.g:
rc = initialize_genesis()
if rc != 0:
sys.exit(rc)
if not didsomething and (cmdset.k or cmdset.l or cmdset.g or cmdset.p):
if cmdset.g:
updateboot('genesis-x86_64')
sys.exit(0)
if not didsomething:
sys.stderr.write('Nothing was done, use initialize -i for '
'interactive mode, or see initialize -h for more options\n')
sys.exit(1)
tmpname = '/var/lib/confluent/public/site/initramfs.cpio.'
for x in bytearray(os.urandom(22)):
tmpname += fnamechars[x >> 2]
topack = []
opath = os.getcwd()
os.chdir('/var/lib/confluent/public/site')
totar = []
if not os.path.exists('confluent_uuid'):
c = client.Command()
for rsp in c.read('/uuid'):
uuid = rsp.get('uuid', {}).get('value', None)
if uuid:
with open('confluent_uuid', 'w') as uuidout:
uuidout.write(uuid)
uuidout.write('\n')
totar.append('confluent_uuid')
topack.append('confluent_uuid')
if os.path.exists('ssh'):
totar.append('ssh')
topack.append('ssh/')
for currd, _, files in os.walk('ssh'):
for fname in files:
topack.append(os.path.join(currd, fname))
if os.path.exists('tls'):
totar.append('tls')
topack.append('tls/')
for currd, _, files in os.walk('tls'):
for fname in files:
topack.append(os.path.join(currd, fname))
with open(tmpname, 'wb') as initramfs:
packit = subprocess.Popen(['cpio', '-H', 'newc', '-o'],
stdout=initramfs, stdin=subprocess.PIPE)
for packfile in topack:
if not isinstance(packfile, bytes):
packfile = packfile.encode('utf8')
packit.stdin.write(packfile)
packit.stdin.write(b'\n')
packit.stdin.close()
res = packit.wait()
if res:
sys.stderr.write('Error occurred while packing site initramfs')
sys.exit(1)
os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio')
if cmdset.g:
updateboot('genesis-x86_64')
if totar:
tmptarname = tmpname.replace('cpio', 'tgz')
tarcmd = ['tar', '-czf', tmptarname] + totar
subprocess.check_call(tarcmd)
os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz')
os.chdir(opath)
print('Site initramfs content packed successfully')
if not os.path.exists('/etc/confluent/srvcert.pem'):
subprocess.check_call(['collective', 'gencert'])
# TODO: check selinux and segetbool for httpd_can_network_connect
# httpd available and enabled?
def updateboot(profilename):
c = client.Command()
for rsp in c.update('/deployment/profiles/{0}'.format(profilename),
{'updateboot': 1}):
if 'updated' in rsp:
print('Updated: {0}'.format(rsp['updated']))
else:
print(repr(rsp))
def oslist():
c = client.Command()
print("Distributions:")
for rsp in c.read('/deployment/distributions'):
if 'error' in rsp:
sys.stderr.write(rsp['error'] + '\n')
exitcode = 1
else:
print(" " + rsp['item']['href'].replace('/', ''))
print("")
print("Profiles:")
for rsp in c.read('/deployment/profiles'):
if 'error' in rsp:
sys.stderr.write(rsp['error'] + '\n')
exitcode = 1
else:
print(" " + rsp['item']['href'].replace('/', ''))
print("")
def osimport(imagefile):
c = client.Command()
imagefile = os.path.abspath(imagefile)
if c.unixdomain:
ofile = open(imagefile, 'rb')
try:
c.add_file(imagefile, ofile.fileno(), 'rb')
except Exception:
pass
importing = False
shortname = None
for rsp in c.create('/deployment/importing/', {'filename': imagefile}):
if 'target' in rsp:
importing = True
shortname = rsp['name']
print('Importing from {0} to {1}'.format(imagefile, rsp['target']))
else:
print(repr(rsp))
try:
while importing:
for rsp in c.read('/deployment/importing/{0}'.format(shortname)):
if 'progress' in rsp:
sys.stdout.write('{0}: {1:.2f}% \r'.format(rsp['phase'],
rsp['progress']))
if rsp['phase'] == 'complete':
importing = False
sys.stdout.write('\n')
for profile in rsp['profiles']:
print('Deployment profile created: {0}'.format(profile))
if rsp['phase'] == 'error':
sys.stderr.write('{0}\n'.format(rsp['error']))
if 'Permission denied' in rsp['error']:
sys.stderr.write('Confluent service unable to write to destination, check that confluent user has access to target\n')
return
sys.stdout.flush()
else:
print(repr(rsp))
time.sleep(0.5)
finally:
if shortname:
list(c.delete('/deployment/importing/{0}'.format(shortname)))
if __name__ == '__main__':
main(sys.argv)