From 6ee9f96e08c931e3b6dac55da622fd11edf61b8f Mon Sep 17 00:00:00 2001 From: Simon Thompson Date: Thu, 6 Jul 2023 09:34:38 +0200 Subject: [PATCH 001/126] fix ansible support when multi stage plays are in playbook --- confluent_server/confluent/runansible.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/runansible.py b/confluent_server/confluent/runansible.py index 299dcb00..cbbecc58 100644 --- a/confluent_server/confluent/runansible.py +++ b/confluent_server/confluent/runansible.py @@ -173,9 +173,11 @@ if __name__ == '__main__': os.chdir(os.path.dirname(sys.argv[2])) if isinstance(plays, dict): plays = [plays] - taskman = TaskQueueManager(inventory=invman, loader=loader, passwords={}, - variable_manager=varman, stdout_callback=ResultsCollector()) + for currplay in plays: + taskman = TaskQueueManager(inventory=invman, loader=loader, passwords={}, + variable_manager=varman, stdout_callback=ResultsCollector()) + currplay['hosts'] = sys.argv[1] if 'become' in currplay and 'become_user' not in currplay: del currplay['become'] From 75add230b6a701054cb8fe848d300c165a5aaf4e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 9 Aug 2023 11:13:09 -0400 Subject: [PATCH 002/126] Skip API key init on checkonly runs Checkonly does not require API key, and makes apiclient more dependent on material that is unlikely to exist early in a deployment. --- .../common/initramfs/opt/confluent/bin/apiclient | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient index fefc07b4..b64052c9 100644 --- a/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient +++ b/confluent_osdeploy/common/initramfs/opt/confluent/bin/apiclient @@ -227,7 +227,7 @@ def get_apikey(nodename, hosts, errout=None): return apikey class HTTPSClient(client.HTTPConnection, object): - def __init__(self, usejson=False, port=443, host=None, errout=None, phmac=None): + def __init__(self, usejson=False, port=443, host=None, errout=None, phmac=None, checkonly=False): self.phmac = phmac self.errout = None if errout: @@ -291,7 +291,7 @@ class HTTPSClient(client.HTTPConnection, object): if self.phmac: with open(phmac, 'r') as hmacin: self.stdheaders['CONFLUENT_CRYPTHMAC'] = hmacin.read() - else: + elif not checkonly: self.stdheaders['CONFLUENT_APIKEY'] = get_apikey(node, self.hosts, errout=self.errout) if mgtiface: self.stdheaders['CONFLUENT_MGTIFACE'] = mgtiface @@ -468,7 +468,7 @@ if __name__ == '__main__': outf.write(chunk) chunk = reader.read(16384) sys.exit(0) - client = HTTPSClient(usejson, errout=errout, phmac=phmac) + client = HTTPSClient(usejson, errout=errout, phmac=phmac, checkonly=checkonly) if waitfor: status = 201 while status != waitfor: From 0b7247d3860bf6b1c2756258bf525f000d830c7a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 10 Aug 2023 16:27:42 -0400 Subject: [PATCH 003/126] Fix CentOS 7 routed deployment with identity image Numerous issues prevented the identity image support from actually running, correct them. --- .../dracut/hooks/initqueue/01-confluent.sh | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/confluent_osdeploy/el7/initramfs/usr/lib/dracut/hooks/initqueue/01-confluent.sh b/confluent_osdeploy/el7/initramfs/usr/lib/dracut/hooks/initqueue/01-confluent.sh index 62d73ed3..418163c0 100644 --- a/confluent_osdeploy/el7/initramfs/usr/lib/dracut/hooks/initqueue/01-confluent.sh +++ b/confluent_osdeploy/el7/initramfs/usr/lib/dracut/hooks/initqueue/01-confluent.sh @@ -7,6 +7,7 @@ if [ -f /tmp/dd_disk ]; then fi done fi +shutdownnic="" oum=$(umask) umask 0077 mkdir -p /etc/confluent @@ -26,6 +27,13 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then deploysrvs=$(sed -n '/^deploy_servers:/, /^[^-]/p' cnflnt.yml |grep ^-|sed -e 's/^- //'|grep -v :) nodename=$(grep ^nodename: cnflnt.yml|awk '{print $2}') + ln -s /opt/confluent/bin/clortho /opt/confluent/bin/genpasshmac + hmackeyfile=/tmp/hmackeyfile + passfile=/etc/confluent/confluent.apikey + passcrypt=/tmp/passcrypt + hmacfile=/tmp/hmacfile + echo -n $(grep ^apitoken: cnflnt.yml|awk '{print $2}') > $hmackeyfile; + /opt/confluent/bin/genpasshmac $passfile $passcrypt $hmacfile $hmackeyfile echo "NODENAME: "$nodename > /etc/confluent/confluent.info for dsrv in $deploysrvs; do echo 'MANAGER: '$dsrv >> /etc/confluent/confluent.info @@ -38,6 +46,7 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then udevadm info $i | grep ID_NET_DRIVER=cdc_ether > /dev/null && continue ip link set $(basename $i) up done + sleep 10 usedhcp=0 for NICGUESS in $(ip link|grep LOWER_UP|grep -v LOOPBACK| awk '{print $2}' | sed -e 's/:$//'); do if [ "$autoconfigmethod" = "dhcp" ]; then @@ -59,15 +68,17 @@ if [ -e /dev/disk/by-label/CNFLNT_IDNT ]; then v4nm=$(grep ipv4_netmask: $tcfg) v4nm=${v4nm#ipv4_netmask: } TESTSRV=$(python /opt/confluent/bin/apiclient -c 2> /dev/null) + if [ ! -z "$TESTSRV" ]; then + python /opt/confluent/bin/apiclient -p $hmacfile /confluent-api/self/registerapikey $passcrypt + mgr=$TESTSRV + ifname=$NICGUESS + shutdownnic=$ifname + break + fi if [ ! -z "$v4gw" ]; then ip route del default via $v4gw fi ip -4 addr flush dev $NICGUESS - if [ ! -z "$TESTSRV" ]; then - mgr=$TESTSRV - ifname=$NICGUESS - break - fi fi done fi @@ -87,13 +98,18 @@ elif [ -z "$ifname" ]; then grep ^EXTMGRINFO: /etc/confluent/confluent.info || return 0 # Do absolutely nothing if no data at all yet echo -n "" > /etc/cmdline.d/01-confluent.conf else - echo -n ip=$v4addr::$v4gw:$v4nm:$hostname:$ifname:none > /etc/cmdline.d/01-confluent.conf + echo ip=$v4addr::$v4gw:$v4nm:$hostname:$ifname:none > /etc/cmdline.d/01-confluent.conf +fi +python /opt/confluent/bin/apiclient /confluent-api/self/deploycfg > /etc/confluent/confluent.deploycfg +if [ ! -z "$shutdownnic" ]; then + if [ ! -z "$v4gw" ]; then + ip route del default via $v4gw + fi + ip -4 addr flush dev $shutdownnic fi -echo -n "" > /tmp/confluent.initq # restart cmdline nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') -#TODO: blkid --label to find mounted api -python /opt/confluent/bin/apiclient /confluent-api/self/deploycfg > /etc/confluent/confluent.deploycfg +echo -n "" > /tmp/confluent.initq if [ -z "$ifname" ]; then ifidx=$(cat /tmp/confluent.ifidx) ifname=$(ip link |grep ^$ifidx:|awk '{print $2}') From dba6e7f378676023edb67c4db6b91926685ece1d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Aug 2023 13:44:37 -0400 Subject: [PATCH 004/126] Change Ubuntu 20.04 to be consistent to 22.04 This aims to bring identity image deployment fully to ubuntu 20.0.4. --- .../hooks/casper-bottom.sh | 28 +++++++++++++++---- .../initramfs/scripts/init-premount/confluent | 2 +- .../initramfs/scripts/init-premount/confluent | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh index 9c067ebc..a08a6b3d 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh +++ b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh @@ -9,19 +9,35 @@ MGR=$(grep ^EXTMGRINFO: /custom-installation/confluent/confluent.info |awk -F'|' MGR=$(grep ^MANAGER: /custom-installation/confluent/confluent.info|head -n 1| awk '{print $2}') MGTIFACE=$(grep $MGR /custom-installation/confluent/confluent.info | grep ^EXTMGRINFO: | head -n 1 | awk -F'|' '{print $2}') oum=$(umask) -umask 077 -chroot . custom-installation/confluent/bin/clortho $NODENAME $MGR > /root/custom-installation/confluent/confluent.apikey -MGR=[$MGR] deploycfg=/root/custom-installation/confluent/confluent.deploycfg +netcfgfile=$deploycfg +umask 077 +if [ -e /tmp/cnflnthmackeytmp ]; then + netcfgfile=/tmp/idnttmp + hmackeyfile=/tmp/cnflnthmackeytmp + #echo -n $(grep ^apitoken: /tmp/identdata/cnflnt.yml|awk '{print $2}') > $hmackeyfile + passfile=/tmp/cnflnttmppassfile + passcrypt=/tmp/cnflntcryptfile + hmacfile=/tmp/cnflnthmacfile + chroot . ln -sf /custom-installation/confluent/bin/clortho custom-installation/confluent/bin/genpasshmac + cp $hmackeyfile tmp + chroot . custom-installation/confluent/bin/genpasshmac $passfile $passcrypt $hmacfile $hmackeyfile + chroot . curl -f -H "CONFLUENT_NODENAME: $NODENAME" -H "CONFLUENT_CRYPTHMAC: $(cat /root/$hmacfile)" -d @/tmp/cnflntcryptfile https://$MGR/confluent-api/self/registerapikey + cp /root/$passfile /root/custom-installation/confluent/confluent.apikey + DEVICE=$(cat /tmp/autodetectnic) +else + chroot . custom-installation/confluent/bin/clortho $NODENAME $MGR > /root/custom-installation/confluent/confluent.apikey + MGR=[$MGR] + nic=$(grep ^MANAGER /custom-installation/confluent/confluent.info|grep fe80::|sed -e s/.*%//|head -n 1) + nic=$(ip link |grep ^$nic:|awk '{print $2}') + DEVICE=${nic%:} +fi if [ -z "$MGTIFACE" ]; then chroot . usr/bin/curl -f -H "CONFLUENT_NODENAME: $NODENAME" -H "CONFLUENT_APIKEY: $(cat /root//custom-installation/confluent/confluent.apikey)" https://${MGR}/confluent-api/self/deploycfg > $deploycfg else chroot . usr/bin/curl -f -H "CONFLUENT_MGTIFACE: $MGTIFACE" -H "CONFLUENT_NODENAME: $NODENAME" -H "CONFLUENT_APIKEY: $(cat /root//custom-installation/confluent/confluent.apikey)" https://${MGR}/confluent-api/self/deploycfg > $deploycfg fi umask $oum -nic=$(grep ^MANAGER /custom-installation/confluent/confluent.info|grep fe80::|sed -e s/.*%//|head -n 1) -nic=$(ip link |grep ^$nic:|awk '{print $2}') -DEVICE=${nic%:} ipv4m=$(grep ^ipv4_method $deploycfg|awk '{print$2}') . /scripts/functions if [ "$ipv4m" = "dhcp" ]; then diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent index c4a6b204..ef09db40 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04/initramfs/scripts/init-premount/confluent @@ -6,7 +6,7 @@ mkdir -p /custom-installation cp -a /opt/confluent /custom-installation touch /custom-installation/confluent/confluent.info TRIES=5 -while [ ! -e /dev/disk ] && [ $TRIES -gt 0 ]; do +while [ ! -e /dev/disk/by-label ] && [ $TRIES -gt 0 ]; do sleep 2 TRIES=$((TRIES - 1)) done diff --git a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent index 1d47bcc0..03761f3a 100755 --- a/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu22.04/initramfs/scripts/init-premount/confluent @@ -6,7 +6,7 @@ mkdir -p /custom-installation cp -a /opt/confluent /custom-installation touch /custom-installation/confluent/confluent.info TRIES=5 -while [ ! -e /dev/disk ] && [ $TRIES -gt 0 ]; do +while [ ! -e /dev/disk/by-label ] && [ $TRIES -gt 0 ]; do sleep 2 TRIES=$((TRIES - 1)) done From ca2dcd2dbeeefce29afe55a36f4c6c08a0cdfcdc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Aug 2023 14:34:12 -0400 Subject: [PATCH 005/126] Reference identity network in ubuntu 20.04 When using identity image, also use identity image for network configuration. --- .../initramfs/custom-installation/hooks/casper-bottom.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh index a08a6b3d..91a13ca2 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh +++ b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh @@ -38,20 +38,20 @@ else chroot . usr/bin/curl -f -H "CONFLUENT_MGTIFACE: $MGTIFACE" -H "CONFLUENT_NODENAME: $NODENAME" -H "CONFLUENT_APIKEY: $(cat /root//custom-installation/confluent/confluent.apikey)" https://${MGR}/confluent-api/self/deploycfg > $deploycfg fi umask $oum -ipv4m=$(grep ^ipv4_method $deploycfg|awk '{print$2}') +ipv4m=$(grep ^ipv4_method $netcfgfile|awk '{print$2}') . /scripts/functions if [ "$ipv4m" = "dhcp" ]; then IP=dhcp configure_networking elif [ "$ipv4m" = "static" ]; then - v4addr=$(grep ^ipv4_address: $deploycfg) + v4addr=$(grep ^ipv4_address: $netcfgfile) v4addr=${v4addr#ipv4_address: } - v4gw=$(grep ^ipv4_gateway: $deploycfg) + v4gw=$(grep ^ipv4_gateway: $netcfgfile) v4gw=${v4gw#ipv4_gateway: } if [ "$v4gw" = "null" ]; then v4gw="" fi - v4nm=$(grep ipv4_netmask: $deploycfg) + v4nm=$(grep ipv4_netmask: $netcfgfile) v4nm=${v4nm#ipv4_netmask: } dnsdomain=$(grep ^dnsdomain: $deploycfg) dnsdomain=${dnsdomain#dnsdomain: } From 95c5253944bdd96cc3d4010200f7d13d69d95700 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 11 Aug 2023 15:01:55 -0400 Subject: [PATCH 006/126] Remove CIDR prefix length in ubuntu 20 deployment This produces invalid configuration for subiquity otherwise. --- .../initramfs/custom-installation/hooks/casper-bottom.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh index 91a13ca2..51d5c25c 100755 --- a/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh +++ b/confluent_osdeploy/ubuntu20.04/initramfs/custom-installation/hooks/casper-bottom.sh @@ -44,7 +44,7 @@ if [ "$ipv4m" = "dhcp" ]; then IP=dhcp configure_networking elif [ "$ipv4m" = "static" ]; then - v4addr=$(grep ^ipv4_address: $netcfgfile) + v4addr=$(grep ^ipv4_address: $netcfgfile| sed -e 's!/.*!!') v4addr=${v4addr#ipv4_address: } v4gw=$(grep ^ipv4_gateway: $netcfgfile) v4gw=${v4gw#ipv4_gateway: } From 5068cda2cf3100c90324f3457fb1a3ef097ad049 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 14 Aug 2023 09:23:00 -0400 Subject: [PATCH 007/126] Catch another sort of exception for non-rpm distributions subprocess may produce a different sort of exception if rpm doesn't exist at all --- imgutil/imgutil | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 23d840e2..faaf21c7 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -1011,7 +1011,7 @@ def fingerprint_host_el(args, hostpath='/'): release = v elif k == 'Version': version = v - except subprocess.SubprocessError: + except (subprocess.SubprocessError, FileNotFoundError): return None if 'el8' not in release and 'el7' not in release and 'el9' not in release: return None From 9e070a14d49fea8e5d9dd5576d778b418c5d4598 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 15 Aug 2023 15:58:10 +0200 Subject: [PATCH 008/126] nodelist delimiters --- confluent_client/bin/nodelist | 13 ++++++++++--- confluent_client/doc/man/nodelist.ronn | 5 +++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/confluent_client/bin/nodelist b/confluent_client/bin/nodelist index b6892e53..c5815bc5 100755 --- a/confluent_client/bin/nodelist +++ b/confluent_client/bin/nodelist @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/libexec/platform-python # vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright 2015-2017 Lenovo @@ -15,13 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -__author__ = 'jjohnson2,alin37' +__author__ = 'jjohnson2,alin37,wgrzeda' import optparse import os import signal import sys + + try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) except AttributeError: @@ -39,9 +41,12 @@ def main(): " or: %prog [options] noderange ...") argparser.add_option('-b', '--blame', action='store_true', help='Show information about how attributes inherited') + argparser.add_option('-d', '--delim', metavar="STRING", default = "\n", + help='Delimiter separating the values') (options, args) = argparser.parse_args() noderange="" nodelist="" + list = [] try: noderange = args[0] nodelist = '/noderange/{0}/nodes/'.format(noderange) @@ -61,7 +66,9 @@ def main(): sys.stderr.write(res['error'] + '\n') exitcode = 1 else: - print(res['item']['href'].replace('/', '')) + elem=(res['item']['href'].replace('/', '')) + list.append(elem) + print(*list, sep = options.delim) sys.exit(exitcode) diff --git a/confluent_client/doc/man/nodelist.ronn b/confluent_client/doc/man/nodelist.ronn index d2fc5ff5..bfe5a86b 100644 --- a/confluent_client/doc/man/nodelist.ronn +++ b/confluent_client/doc/man/nodelist.ronn @@ -4,7 +4,7 @@ nodelist(8) -- List confluent nodes and their attributes ## SYNOPSIS `nodelist ` -`nodelist [-b] ...` +`nodelist [-b] [-d] {string} ...` ## DESCRIPTION @@ -24,7 +24,8 @@ all attributes that begin with `net.` and end with `switch`. * `-b`, `--blame`: Annotate inherited and expression based attributes to show their base value. - +* `-d`, `--delim`: + Choose a delimiter to separat the values. Default - ENTER. ## EXAMPLES * Listing matching nodes of a simple noderange: `# nodelist n1-n4` From c7a323b37034a79e390f30565c3a517ee5eb30d6 Mon Sep 17 00:00:00 2001 From: weragrzeda Date: Tue, 15 Aug 2023 16:31:38 +0200 Subject: [PATCH 009/126] nodelist delimiters fixed for py2 compatibility --- confluent_client/bin/nodelist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/bin/nodelist b/confluent_client/bin/nodelist index c5815bc5..462ed922 100755 --- a/confluent_client/bin/nodelist +++ b/confluent_client/bin/nodelist @@ -68,7 +68,7 @@ def main(): else: elem=(res['item']['href'].replace('/', '')) list.append(elem) - print(*list, sep = options.delim) + print(options.delim.join(list)) sys.exit(exitcode) From 84d23bb1fd3e986b7508dc075e65515047b52f5f Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 16 Aug 2023 12:00:50 -0400 Subject: [PATCH 010/126] Begin work on Ubuntu cloning --- .../initramfs/scripts/init-premount/confluent | 2 +- imgutil/imgutil | 36 +++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/initramfs/scripts/init-premount/confluent b/confluent_osdeploy/ubuntu20.04-diskless/initramfs/scripts/init-premount/confluent index 583ffd9e..2f7094b9 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/initramfs/scripts/init-premount/confluent +++ b/confluent_osdeploy/ubuntu20.04-diskless/initramfs/scripts/init-premount/confluent @@ -107,7 +107,7 @@ if [ "$v6meth" = static ]; then ip route add default via $v6gw fi fi -v4meth=$(grep ^ipv6_method: /etc/confluent/confluent.deploycfg|awk '{print $2}') +v4meth=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg|awk '{print $2}') if [ "$v4meth" = static ]; then v4addr=$(grep ^ipv4_address: /etc/confluent/confluent.deploycfg | awk '{print $2}') v4prefix=$(grep ^prefix: /etc/confluent/confluent.deploycfg | awk '{print $2}') diff --git a/imgutil/imgutil b/imgutil/imgutil index faaf21c7..389421c8 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -142,10 +142,26 @@ def capture_fs(args): subprocess.check_call(['mksquashfs', '/run/imgutil/capin', fname + '.sfs', '-comp', 'xz']) def capture_local_cleanup(): - shutil.rmtree('/usr/lib/dracut/modules.d/97confluent') + try: + shutil.rmtree('/usr/lib/dracut/modules.d/97confluent') + except Exception: + pass subprocess.check_call(['umount', '/run/imgutil/capout']) def build_boot_tree(targpath): + if glob.glob('/usr/lib/dracut/modules.d/97confluent/install*'): + return build_el_boot_tree(targpath) + elif glob.glob('/etc/initramfs-tools/'): + return build_deb_boot_tree(targpath) + +def build_deb_boot_tree(targpath): + kver = os.uname().release + mkdirp(os.path.join(targpath, 'boot/initramfs/')) + subprocess.check_call(['mkinitramfs', '-o', os.path.join(targpath, 'boot/initramfs/distribution')]) + shutil.copy2('/boot/vmlinuz-{}'.format(kver), os.path.join(targpath, 'boot/kernel')) + gather_bootloader(targpath) + +def build_el_boot_tree(targpath): for dscript in glob.glob('/usr/lib/dracut/modules.d/97confluent/install*'): os.chmod(dscript, 0o755) kver = os.uname().release @@ -168,19 +184,25 @@ def capture_remote(args): # with here locally, # another that is remotely called to gather target profile info # and a third that is exclusive to pack_image for diskless mode - utillib = __file__.replace('bin/imgutil', 'lib/imgutil') - utillib = os.path.join(utillib, 'el8/dracut/') subprocess.check_call(['ssh', targ, 'mkdir', '-p', '/run/imgutil/capenv']) subprocess.check_call(['rsync', __file__, '{0}:/run/imgutil/capenv/'.format(targ)]) finfo = subprocess.check_output(['ssh', targ, 'python3', '/run/imgutil/capenv/imgutil', 'getfingerprint']).decode('utf8') finfo = json.loads(finfo) - if finfo['oscategory'] not in ('el8', 'el9'): + if finfo['oscategory'] not in ('el8', 'el9', 'ubuntu20.04', 'ubuntu22.04'): raise Exception('Not yet supported for capture: ' + repr(finfo)) oscat = finfo['oscategory'] subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'python3', '/run/imgutil/capenv/imgutil', 'capturelocal']) - utillib = __file__.replace('bin/imgutil', 'lib/imgutil') - utillib = os.path.join(utillib, '{}/dracut/'.format(oscat)) - subprocess.check_call(['rsync', '-a', utillib, '{0}:/usr/lib/dracut/modules.d/97confluent'.format(targ)]) + utillib = __file__.replace('bin/imgutil', 'lib/imgutil') + if oscat.startswith('ubuntu'): + utillib = os.path.join(utillib, '{}/initramfs-tools/'.format(oscat)) + if not os.path.exists(utillib): + raise Exception('Not yet supported for capture: ' + repr(finfo)) + subprocess.check_call(['rsync', '-a', utillib, '{0}:/etc/initramfs-tools'.format(targ)]) + else: + utillib = os.path.join(utillib, '{}/dracut/'.format(oscat)) + if not os.path.exists(utillib): + raise Exception('Not yet supported for capture: ' + repr(finfo)) + subprocess.check_call(['rsync', '-a', utillib, '{0}:/usr/lib/dracut/modules.d/97confluent'.format(targ)]) sys.stdout.write('Generating deployment initramfs...') sys.stdout.flush() subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'python3', '/run/imgutil/capenv/imgutil', 'capturelocalboot']) From 58cc9840b35ba1347fa85dc7ee2fac24863d2418 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 16 Aug 2023 14:28:10 -0400 Subject: [PATCH 011/126] Prune out any netplan configuration, if exists To mitigate chance of network configuration being tanked by image based net config, mask it when capturing. --- imgutil/imgutil | 1 + 1 file changed, 1 insertion(+) diff --git a/imgutil/imgutil b/imgutil/imgutil index 389421c8..839e5e98 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -139,6 +139,7 @@ def capture_fs(args): masker.mask('/etc/ssh/*key') masker.mask('/etc/pki/tls/private/*') masker.mask('/root/.ssh/id_*') + masker.mask('/etc/netplan/*.yaml') subprocess.check_call(['mksquashfs', '/run/imgutil/capin', fname + '.sfs', '-comp', 'xz']) def capture_local_cleanup(): From b27542ec0008140c02e79a83e6500c3970ec4ff2 Mon Sep 17 00:00:00 2001 From: Simon Thompson Date: Thu, 17 Aug 2023 10:30:36 +0200 Subject: [PATCH 012/126] Confignet does not set autoconnect with network manager so disk installed images do not bring up interface on reboot --- confluent_osdeploy/common/profile/scripts/confignet | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index b2cee959..e7ecc3fa 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -253,6 +253,7 @@ class NetworkManager(object): def apply_configuration(self, cfg): cmdargs = {} + cmdargs['connection.autoconnect'] = 'yes' stgs = cfg['settings'] cmdargs['ipv6.method'] = stgs.get('ipv6_method', 'link-local') if stgs.get('ipv6_address', None): From bc7dbeebea6e4ac240c76d3db94dac38abb274f5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 11:16:32 -0400 Subject: [PATCH 013/126] Add ubuntu 22.04 diskless --- confluent_osdeploy/ubuntu22.04-diskless | 1 + 1 file changed, 1 insertion(+) create mode 120000 confluent_osdeploy/ubuntu22.04-diskless diff --git a/confluent_osdeploy/ubuntu22.04-diskless b/confluent_osdeploy/ubuntu22.04-diskless new file mode 120000 index 00000000..00822b05 --- /dev/null +++ b/confluent_osdeploy/ubuntu22.04-diskless @@ -0,0 +1 @@ +ubuntu20.04-diskless \ No newline at end of file From 53d2b873a21c6e2f575eabf78a5a6a69cc5e2e63 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 11:28:29 -0400 Subject: [PATCH 014/126] Package ubuntu 22.04 diskless --- confluent_osdeploy/confluent_osdeploy.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/confluent_osdeploy.spec.tmpl b/confluent_osdeploy/confluent_osdeploy.spec.tmpl index 37cbaa8f..d939a0c3 100644 --- a/confluent_osdeploy/confluent_osdeploy.spec.tmpl +++ b/confluent_osdeploy/confluent_osdeploy.spec.tmpl @@ -42,7 +42,7 @@ for os in rhvh4 el7 genesis el8 suse15 ubuntu18.04 ubuntu20.04 ubuntu22.04 coreo mv ../addons.cpio . cd .. done -for os in el7 el8 suse15 el9 ubuntu20.04; do +for os in el7 el8 suse15 el9 ubuntu20.04 ubuntu22.04; do mkdir ${os}disklessout cd ${os}disklessout if [ -d ../${os}bin ]; then From 8ddcf45e1d5d605f22198d0ac16539cd985e2f7a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 14:03:55 -0400 Subject: [PATCH 015/126] Create links for ubuntu20.04 and ubuntu22.04 --- imgutil/ubuntu20.04 | 1 + imgutil/ubuntu22.04 | 1 + 2 files changed, 2 insertions(+) create mode 120000 imgutil/ubuntu20.04 create mode 120000 imgutil/ubuntu22.04 diff --git a/imgutil/ubuntu20.04 b/imgutil/ubuntu20.04 new file mode 120000 index 00000000..7d13753d --- /dev/null +++ b/imgutil/ubuntu20.04 @@ -0,0 +1 @@ +ubuntu \ No newline at end of file diff --git a/imgutil/ubuntu22.04 b/imgutil/ubuntu22.04 new file mode 120000 index 00000000..7d13753d --- /dev/null +++ b/imgutil/ubuntu22.04 @@ -0,0 +1 @@ +ubuntu \ No newline at end of file From c47066d713707b1d0e42bbf118931cb50d6f62b1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 14:11:41 -0400 Subject: [PATCH 016/126] Include ubuntu material in packaging --- imgutil/confluent_imgutil.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/confluent_imgutil.spec.tmpl b/imgutil/confluent_imgutil.spec.tmpl index fcf0640a..35ed4070 100644 --- a/imgutil/confluent_imgutil.spec.tmpl +++ b/imgutil/confluent_imgutil.spec.tmpl @@ -34,7 +34,7 @@ mkdir -p opt/confluent/lib/imgutil mkdir -p opt/confluent/bin mv imgutil opt/confluent/bin/ chmod a+x opt/confluent/bin/imgutil -mv ubuntu suse15 el7 el9 el8 opt/confluent/lib/imgutil/ +mv ubuntu* suse15 el7 el9 el8 opt/confluent/lib/imgutil/ mkdir -p opt/confluent/share/licenses/confluent_imgutil cp LICENSE opt/confluent/share/licenses/confluent_imgutil From b88ccc292c59e4b976f7ab598dae7e92185c3d3b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 14:36:38 -0400 Subject: [PATCH 017/126] Avoid deprecation on distutils with newer python Unfortunately, python 3.6 needs the distutils version, but we should avoid it altogether by python 3.10, but by 3.10 the shutil.copytree can do it. --- imgutil/imgutil | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 839e5e98..5f64c988 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -3,7 +3,13 @@ import configparser import ctypes import ctypes.util import datetime -from distutils.dir_util import copy_tree +import inspect +from shutil import copytree as copytree +if 'dirs_exist_ok' in inspect.getargspec(copytree).args: + def copy_tree(src, dst): + copytree(src, dst, dirs_exist_ok=True) +else: + from distutils.dir_util import copy_tree import glob import json import argparse From 7051f467bbb8a2b586a8c769551004767cebc9d5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 14:49:44 -0400 Subject: [PATCH 018/126] Avoid a deprectationwarning in copytree handling While trying to address one deprecationwarning, we hit another. Check if the new function exists and use it, falling back to distutils if everything fails. --- imgutil/imgutil | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 5f64c988..887086d5 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -5,7 +5,7 @@ import ctypes.util import datetime import inspect from shutil import copytree as copytree -if 'dirs_exist_ok' in inspect.getargspec(copytree).args: +if hasattr(inspect, 'getfullargspec') and 'dirs_exist_ok' in inspect.getfullargspec(copytree).args: def copy_tree(src, dst): copytree(src, dst, dirs_exist_ok=True) else: From 7e209e412ab8be29612d938819d50a266899d35c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 16:16:36 -0400 Subject: [PATCH 019/126] Make confluent hook executable in ubuntu diskless --- imgutil/imgutil | 1 + 1 file changed, 1 insertion(+) diff --git a/imgutil/imgutil b/imgutil/imgutil index 887086d5..28208e6a 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -205,6 +205,7 @@ def capture_remote(args): if not os.path.exists(utillib): raise Exception('Not yet supported for capture: ' + repr(finfo)) subprocess.check_call(['rsync', '-a', utillib, '{0}:/etc/initramfs-tools'.format(targ)]) + subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'chmod', '+x', '/etc/initramfs-tools/hook/confluent']) else: utillib = os.path.join(utillib, '{}/dracut/'.format(oscat)) if not os.path.exists(utillib): From 899ce7f055162377ed7449e853df716247f92e17 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 17 Aug 2023 16:27:45 -0400 Subject: [PATCH 020/126] Correct spelling of hooks directory in Ubuntu cloning --- imgutil/imgutil | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 28208e6a..0a9d4756 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -205,7 +205,7 @@ def capture_remote(args): if not os.path.exists(utillib): raise Exception('Not yet supported for capture: ' + repr(finfo)) subprocess.check_call(['rsync', '-a', utillib, '{0}:/etc/initramfs-tools'.format(targ)]) - subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'chmod', '+x', '/etc/initramfs-tools/hook/confluent']) + subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'chmod', '+x', '/etc/initramfs-tools/hooks/confluent']) else: utillib = os.path.join(utillib, '{}/dracut/'.format(oscat)) if not os.path.exists(utillib): From b14b34bdbdb2ef8d351a9e712ec8957628617b28 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 22 Aug 2023 12:28:07 -0400 Subject: [PATCH 021/126] Add limited sensor support for Eaton PDUs --- .../plugins/hardwaremanagement/eatonpdu.py | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py b/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py index fea6338c..16be5b38 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/eatonpdu.py @@ -25,6 +25,10 @@ import hashlib import json import time +def simplify_name(name): + return name.lower().replace(' ', '_').replace('/', '-').replace( + '_-_', '-') + #eaton uses 'eval' rather than json, massage it to be valid json def sanitize_json(data): if not isinstance(data, str): @@ -131,6 +135,43 @@ class WebConnection(wc.SecureHTTPConnection): body = rsp.read() return body, rsp.status +_sensors_by_node = {} +def get_sensor_data(element, node, configmanager): + category, name = element[-2:] + justnames = False + readings = [] + if len(element) == 3: + # just get names + category = name + name = 'all' + justnames = True + if category in ('leds, fans', 'temperature'): + return + sn = _sensors_by_node.get(node, None) + if not sn or sn[1] < time.time(): + gc = PDUClient(node, configmanager) + try: + sdata = gc.get_sensor_data() + finally: + gc.logout() + _sensors_by_node[node] = [sdata, time.time() + 1] + sn = _sensors_by_node.get(node, None) + for outlet in sn[0]: + for sensename in sn[0][outlet]: + myname = 'Outlet {0} {1}'.format(outlet, sensename) + measurement = sn[0][outlet][sensename] + if name == 'all' or simplify_name(myname) == name: + readings.append({ + 'name': myname, + 'value': float(measurement['value']), + 'units': measurement['units'], + 'type': measurement['type'], + }) + if justnames: + for reading in readings: + yield msg.ChildCollection(simplify_name(reading['name'])) + else: + yield msg.SensorReadings(readings, name=node) class PDUClient(object): @@ -231,6 +272,28 @@ class PDUClient(object): if outdata[0] == outlet: return 'on' if outdata[3] else 'off' return + + def get_sensor_data(self): + rsp = self.do_request('cgi_pdu_outlets') + data = sanitize_json(rsp[0]) + data = json.loads(data) + data = data['data'][0] + sdata = {} + for outdata in data: + outsense = {} + outletname = outdata[0][0] + outsense['Energy'] = { + 'value': float(outdata[11] / 1000), + 'units': 'kwh', + 'type': 'Energy' + } + outsense['Power'] = { + 'value': float(outdata[4]), + 'units': 'w', + 'type': 'Power', + } + sdata[outletname] = outsense + return sdata def set_outlet(self, outlet, state): rsp = self.do_request('cgi_pdu_outlets') @@ -247,7 +310,12 @@ class PDUClient(object): idx += 1 def retrieve(nodes, element, configmanager, inputdata): - if 'outlets' not in element: + if element[0] == 'sensors': + for node in nodes: + for res in get_sensor_data(element, node, configmanager): + yield res + return + elif 'outlets' not in element: for node in nodes: yield msg.ConfluentResourceUnavailable(node, 'Not implemented') return From af4ca64da80631579ab82cb4292fa04370f66557 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 24 Aug 2023 10:53:00 -0400 Subject: [PATCH 022/126] Fix hotplug firmware to use rootfs after boot --- .../el7-diskless/profiles/default/scripts/imageboot.sh | 2 ++ .../el8-diskless/profiles/default/scripts/imageboot.sh | 2 ++ .../el9-diskless/profiles/default/scripts/imageboot.sh | 2 ++ .../ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh | 2 ++ 4 files changed, 8 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh index 9940bc2e..4e81fe19 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh @@ -125,4 +125,6 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index 0bbf154b..ee2a8125 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -127,5 +127,7 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index 0bbf154b..ee2a8125 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -127,5 +127,7 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh index ce669376..0e1b68df 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh @@ -134,4 +134,6 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware exec /opt/confluent/bin/start_root From 85375cc733c790f5cde5c91732aa695b799fa8a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 24 Aug 2023 10:53:00 -0400 Subject: [PATCH 023/126] Fix hotplug firmware to use rootfs after boot --- .../el7-diskless/profiles/default/scripts/imageboot.sh | 2 ++ .../el8-diskless/profiles/default/scripts/imageboot.sh | 2 ++ .../el9-diskless/profiles/default/scripts/imageboot.sh | 2 ++ .../ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh | 2 ++ 4 files changed, 8 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh index 9940bc2e..4e81fe19 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/imageboot.sh @@ -125,4 +125,6 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index 0bbf154b..ee2a8125 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -127,5 +127,7 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index 0bbf154b..ee2a8125 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -127,5 +127,7 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware kill $(grep -l ^/usr/lib/systemd/systemd-udevd /proc/*/cmdline|cut -d/ -f 3) exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh index ce669376..0e1b68df 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh @@ -134,4 +134,6 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware exec /opt/confluent/bin/start_root From 22cb2bdc401954b3f96d9125ac42a13e1853785d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 29 Aug 2023 10:57:25 -0400 Subject: [PATCH 024/126] Handle Ubuntu hardcoded grub cfg Ubuntu hardcodes grub.cfg to another location. Make a stub file as a flag to guide osimage to know where grub.cfg goes. --- confluent_server/confluent/osimage.py | 15 ++++++++++++++- imgutil/imgutil | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/osimage.py b/confluent_server/confluent/osimage.py index b1643135..8884e0e9 100644 --- a/confluent_server/confluent/osimage.py +++ b/confluent_server/confluent/osimage.py @@ -2,6 +2,7 @@ import eventlet import eventlet.green.select as select import eventlet.green.subprocess as subprocess +from fnmatch import fnmatch import glob import logging logging.getLogger('libarchive').addHandler(logging.NullHandler()) @@ -153,6 +154,14 @@ def update_boot_esxi(profiledir, profile, label): '{0}/boot.img'.format(profiledir), profname], preexec_fn=relax_umask) +def find_glob(loc, fileglob): + for cdir, _, fs in os.walk(loc): + for f in fs: + if fnmatch(f, fileglob): + return os.path.join(cdir, f) + return None + + def update_boot_linux(profiledir, profile, label): profname = os.path.basename(profiledir) kernelargs = profile.get('kernelargs', '') @@ -170,7 +179,11 @@ def update_boot_linux(profiledir, profile, label): for initramfs in initrds: grubcfg += " /initramfs/{0}".format(initramfs) grubcfg += "\n}\n" - with open(profiledir + '/boot/efi/boot/grub.cfg', 'w') as grubout: + # well need to honor grubprefix path if different + grubcfgpath = find_glob(profiledir + '/boot', 'grub.cfg') + if not grubcfgpath: + grubcfgpath = profiledir + '/boot/efi/boot/grub.cfg' + with open(grubcfgpath, 'w') as grubout: grubout.write(grubcfg) ipxeargs = kernelargs for initramfs in initrds: diff --git a/imgutil/imgutil b/imgutil/imgutil index 0a9d4756..b683b1e5 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -1378,6 +1378,10 @@ def gather_bootloader(outdir, rootpath='/'): grubs = glob.glob(grubs) if len(grubs) == 1: grubbin = grubs[0] + if 'ubuntu' in grubbin: # we needd to store a hint that this grub has a different hard coded prefix + mkdirp(os.path.join(outdir, 'boot/EFI/ubuntu/')) + with open(os.path.join(outdir, 'boot/EFI/ubuntu/grub.cfg'), 'w') as wo: + wo.write('') shutil.copyfile(grubbin, os.path.join(outdir, 'boot/efi/boot/grubx64.efi')) shutil.copyfile(grubbin, os.path.join(outdir, 'boot/efi/boot/grub.efi')) From 1f32ef2310fe57f8e7bddea0ab43f1fd95fbba6d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 10:07:51 -0400 Subject: [PATCH 025/126] Implement basic netplan support Start with single links, ipv4 only for ubuntu. --- .../common/profile/scripts/confignet | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index e7ecc3fa..f44e6f4a 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -8,6 +8,10 @@ import sys import time import shlex import subprocess +try: + import yaml +except ImportError: + pass try: from importlib.machinery import SourceFileLoader def load_source(mod, path): @@ -107,6 +111,80 @@ def get_interface_name(iname, settings): return iname return None +class NetplanManager(object): + def __init__(self): + self.cfgbydev = {} + self.read_connections() + + def read_connections(self): + for plan in glob.glob('/etc/netplan/*.y*ml'): + with open(plan) as planfile: + planinfo = yaml.safe_load(planfile) + if not planinfo: + continue + nicinfo = planinfo.get('network', {}).get('ethernets', {}) + for devname in nicinfo: + if devname == 'lo': + continue + if 'gateway4' in nicinfo[devname]: + # normalize deprecated syntax on read in + gw4 = nicinfo[devname]['gateway4'] + del nicinfo[devname]['gateway4'] + routeinfo = nicinfo[devname].get('routes', []) + for ri in routeinfo: + if ri.get('via', None) == gw4 and ri.get('to', None) in ('default', '0.0.0.0/0', '0/0'): + break + else: + routeinfo.append({ + 'to': 'default', + 'via': gw4 + }) + nicinfo[devname]['routes'] = routeinfo + self.cfgbydev[devname] = nicinfo[devname] + + def apply_configuration(self, cfg): + devnames = cfg['interfaces'] + if len(devnames) != 1: + raise Exception('Multi-nic team/bonds not yet supported') + stgs = cfg['settings'] + needcfgapply = False + for devname in devnames: + needcfgwrite = False + if stgs['ipv4_method'] == 'static': + curraddr = stgs['ipv4_address'] + currips = self.getcfgarrpath([devname, 'addresses']) + if curraddr not in currips: + needcfgwrite = True + currips.append(curraddr) + gwaddr = stgs.get('ipv4_gateway', None) + if gwaddr: + cfgroutes = self.getcfgarrpath([devname, 'routes']) + for rinfo in cfgroutes: + if rinfo.get('via', None) == gwaddr: + break + else: + needcfgwrite = True + cfgroutes.append({'via': gwaddr, 'to': 'default'}) + if needcfgwrite: + needcfgaply = True + newcfg = {'network': {'version': 2, 'ethernets': {devname: self.cfgbydev[devname]}}} + with open('/etc/netplan/{0}-confluentcfg.yaml'.format(devname), 'w') as planout: + planout.write(yaml.dump(newcfg)) + if needcfgapply: + subprocess.call(['netplan', 'apply']) + + def getcfgarrpath(self, devpath): + currptr = self.cfgbydev + for k in devpath[:-1]: + if k not in currptr: + currptr[k] = {} + currptr = currptr[k] + if devpath[-1] not in currptr: + currptr[devpath[-1]] = [] + return currptr[devpath[-1]] + + + class WickedManager(object): def __init__(self): self.teamidx = 0 @@ -360,6 +438,8 @@ if __name__ == '__main__': if not netname_to_interfaces['default']['interfaces']: del netname_to_interfaces['default'] rm_tmp_llas(tmpllas) + if os.path.exists('/usr/sbin/netplan'): + nm = NetplanManager() if os.path.exists('/usr/bin/nmcli'): nm = NetworkManager(devtypes) elif os.path.exists('/usr/sbin/wicked'): From 6ab91b50af315d807a211d1834de41c70e45c7a6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 10:55:18 -0400 Subject: [PATCH 026/126] Add missing bits of Ubuntu diskless --- .../default/scripts/firstboot.service | 11 + .../profiles/default/scripts/firstboot.sh | 42 ++ .../profiles/default/scripts/getinstalldisk | 93 ++++ .../profiles/default/scripts/image2disk.py | 419 ++++++++++++++++++ .../profiles/default/scripts/imageboot.sh | 8 +- .../profiles/default/scripts/installimage | 46 ++ 6 files changed, 615 insertions(+), 4 deletions(-) create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.service create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.service b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.service new file mode 100644 index 00000000..209a95e6 --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.service @@ -0,0 +1,11 @@ +[Unit] +Description=First Boot Process +Requires=network-online.target +After=network-online.target + +[Service] +ExecStart=/opt/confluent/bin/firstboot.sh + +[Install] +WantedBy=multi-user.target + diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh new file mode 100644 index 00000000..3f38cb44 --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +# This script is executed on the first boot after install has +# completed. It is best to edit the middle of the file as +# noted below so custom commands are executed before +# the script notifies confluent that install is fully complete. + +HOME=$(getent passwd $(whoami)|cut -d: -f 6) +export HOME +nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') +confluent_apikey=$(cat /etc/confluent/confluent.apikey) +confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +export nodename confluent_mgr confluent_profile +. /etc/confluent/functions +( +exec >> /var/log/confluent/confluent-firstboot.log +exec 2>> /var/log/confluent/confluent-firstboot.log +chmod 600 /var/log/confluent/confluent-firstboot.log +while ! ping -c 1 $confluent_mgr >& /dev/null; do + sleep 1 +done + +if [ ! -f /etc/confluent/firstboot.ran ]; then + touch /etc/confluent/firstboot.ran + + cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt + + run_remote firstboot.custom + # Firstboot scripts may be placed into firstboot.d, e.g. firstboot.d/01-firstaction.sh, firstboot.d/02-secondaction.sh + run_remote_parts firstboot.d + + # Induce execution of remote configuration, e.g. ansible plays in ansible/firstboot.d/ + run_remote_config firstboot.d +fi + +curl -X POST -d 'status: complete' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus +systemctl disable firstboot +rm /etc/systemd/system/firstboot.service +rm /etc/confluent/firstboot.ran +) & +tail --pid $! -F /var/log/confluent/confluent-firstboot.log > /dev/console diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk new file mode 100644 index 00000000..522aba00 --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk @@ -0,0 +1,93 @@ +import subprocess +import os + +class DiskInfo(object): + def __init__(self, devname): + self.name = devname + self.wwn = None + self.path = None + self.model = '' + self.size = 0 + self.driver = None + self.mdcontainer = '' + devnode = '/dev/{0}'.format(devname) + qprop = subprocess.check_output( + ['udevadm', 'info', '--query=property', devnode]) + if not isinstance(qprop, str): + qprop = qprop.decode('utf8') + for prop in qprop.split('\n'): + if '=' not in prop: + continue + k, v = prop.split('=', 1) + if k == 'DEVTYPE' and v != 'disk': + raise Exception('Not a disk') + elif k == 'DM_NAME': + raise Exception('Device Mapper') + elif k == 'ID_MODEL': + self.model = v + elif k == 'DEVPATH': + self.path = v + elif k == 'ID_WWN': + self.wwn = v + elif k == 'MD_CONTAINER': + self.mdcontainer = v + attrs = subprocess.check_output(['udevadm', 'info', '-a', devnode]) + if not isinstance(attrs, str): + attrs = attrs.decode('utf8') + for attr in attrs.split('\n'): + if '==' not in attr: + continue + k, v = attr.split('==', 1) + k = k.strip() + if k == 'ATTRS{size}': + self.size = v.replace('"', '') + elif (k == 'DRIVERS' and not self.driver + and v not in ('"sd"', '""')): + self.driver = v.replace('"', '') + if not self.driver and 'imsm' not in self.mdcontainer: + raise Exception("No driver detected") + if os.path.exists('/sys/block/{0}/size'.format(self.name)): + with open('/sys/block/{0}/size'.format(self.name), 'r') as sizesrc: + self.size = int(sizesrc.read()) * 512 + if int(self.size) < 536870912: + raise Exception("Device too small for install") + + @property + def priority(self): + if self.model.lower() in ('m.2 nvme 2-bay raid kit', 'thinksystem_m.2_vd', 'thinksystem m.2', 'thinksystem_m.2'): + return 0 + if 'imsm' in self.mdcontainer: + return 1 + if self.driver == 'ahci': + return 2 + if self.driver.startswith('megaraid'): + return 3 + if self.driver.startswith('mpt'): + return 4 + return 99 + + def __repr__(self): + return repr({ + 'name': self.name, + 'path': self.path, + 'wwn': self.wwn, + 'driver': self.driver, + 'size': self.size, + 'model': self.model, + }) + + +def main(): + disks = [] + for disk in sorted(os.listdir('/sys/class/block')): + try: + disk = DiskInfo(disk) + disks.append(disk) + except Exception as e: + print("Skipping {0}: {1}".format(disk, str(e))) + nd = [x.name for x in sorted(disks, key=lambda x: x.priority)] + if nd: + open('/tmp/installdisk', 'w').write(nd[0]) + +if __name__ == '__main__': + main() diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py new file mode 100644 index 00000000..7371dcf1 --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py @@ -0,0 +1,419 @@ +#!/usr/bin/python3 +import glob +import json +import os +import re +import time +import shutil +import socket +import stat +import struct +import sys +import subprocess + +bootuuid = None + +def get_next_part_meta(img, imgsize): + if img.tell() == imgsize: + return None + pathlen = struct.unpack('!H', img.read(2))[0] + mountpoint = img.read(pathlen).decode('utf8') + jsonlen = struct.unpack('!I', img.read(4))[0] + metadata = json.loads(img.read(jsonlen).decode('utf8')) + img.seek(16, 1) # skip the two 64-bit values we don't use, they are in json + nextlen = struct.unpack('!H', img.read(2))[0] + img.seek(nextlen, 1) # skip filesystem type + nextlen = struct.unpack('!H', img.read(2))[0] + img.seek(nextlen, 1) # skip orig devname (redundant with json) + nextlen = struct.unpack('!H', img.read(2))[0] + img.seek(nextlen, 1) # skip padding + nextlen = struct.unpack('!Q', img.read(8))[0] + img.seek(nextlen, 1) # go to next section + return metadata + +def get_multipart_image_meta(img): + img.seek(0, 2) + imgsize = img.tell() + img.seek(16) + seekamt = img.read(1) + img.seek(struct.unpack('B', seekamt)[0], 1) + partinfo = get_next_part_meta(img, imgsize) + while partinfo: + yield partinfo + partinfo = get_next_part_meta(img, imgsize) + +def get_image_metadata(imgpath): + with open(imgpath, 'rb') as img: + header = img.read(16) + if header == b'\x63\x7b\x9d\x26\xb7\xfd\x48\x30\x89\xf9\x11\xcf\x18\xfd\xff\xa1': + for md in get_multipart_image_meta(img): + yield md + else: + raise Exception('Installation from single part image not supported') + +class PartedRunner(): + def __init__(self, disk): + self.disk = disk + + def run(self, command, check=True): + command = command.split() + command = ['parted', '-a', 'optimal', '-s', self.disk] + command + if check: + return subprocess.check_output(command).decode('utf8') + else: + return subprocess.run(command, stdout=subprocess.PIPE).stdout.decode('utf8') + +def fixup(rootdir, vols): + devbymount = {} + for vol in vols: + devbymount[vol['mount']] = vol['targetdisk'] + fstabfile = os.path.join(rootdir, 'etc/fstab') + with open(fstabfile) as tfile: + fstab = tfile.read().split('\n') + while not fstab[0]: + fstab = fstab[1:] + if os.path.exists(os.path.join(rootdir, '.autorelabel')): + os.unlink(os.path.join(rootdir, '.autorelabel')) + with open(fstabfile, 'w') as tfile: + for tab in fstab: + entry = tab.split() + if tab.startswith('#ORIGFSTAB#'): + if entry[1] in devbymount: + targetdev = devbymount[entry[1]] + if targetdev.startswith('/dev/localstorage/'): + entry[0] = targetdev + else: + uuid = subprocess.check_output(['blkid', '-s', 'UUID', '-o', 'value', targetdev]).decode('utf8') + uuid = uuid.strip() + entry[0] = 'UUID={}'.format(uuid) + elif entry[2] == 'swap': + entry[0] = '/dev/mapper/localstorage-swap' + entry[0] = entry[0].ljust(42) + entry[1] = entry[1].ljust(16) + entry[3] = entry[3].ljust(28) + tab = '\t'.join(entry) + tfile.write(tab + '\n') + with open(os.path.join(rootdir, 'etc/hostname'), 'w') as nameout: + nameout.write(socket.gethostname() + '\n') + selinuxconfig = os.path.join(rootdir, 'etc/selinux/config') + policy = None + if os.path.exists(selinuxconfig): + with open(selinuxconfig) as cfgin: + sec = cfgin.read().split('\n') + for l in sec: + l = l.split('#', 1)[0] + if l.startswith('SELINUXTYPE='): + _, policy = l.split('=') + for sshkey in glob.glob(os.path.join(rootdir, 'etc/ssh/*_key*')): + os.unlink(sshkey) + for sshkey in glob.glob('/etc/ssh/*_key*'): + newkey = os.path.join(rootdir, sshkey[1:]) + shutil.copy2(sshkey, newkey) + finfo = os.stat(sshkey) + os.chown(newkey, finfo[stat.ST_UID], finfo[stat.ST_GID]) + + # Will use confignet to handle networking for ubuntu + shutil.rmtree(os.path.join(rootdir, 'etc/confluent/')) + shutil.copytree('/etc/confluent', os.path.join(rootdir, 'etc/confluent')) + if policy: + sys.stdout.write('Applying SELinux labeling...') + sys.stdout.flush() + subprocess.check_call(['setfiles', '-r', rootdir, os.path.join(rootdir, 'etc/selinux/{}/contexts/files/file_contexts'.format(policy)), os.path.join(rootdir, 'etc')]) + subprocess.check_call(['setfiles', '-r', rootdir, os.path.join(rootdir, 'etc/selinux/{}/contexts/files/file_contexts'.format(policy)), os.path.join(rootdir, 'opt')]) + sys.stdout.write('Done\n') + sys.stdout.flush() + for metafs in ('proc', 'sys', 'dev'): + subprocess.check_call(['mount', '-o', 'bind', '/{}'.format(metafs), os.path.join(rootdir, metafs)]) + if os.path.exists(os.path.join(rootdir, 'etc/lvm/devices/system.devices')): + os.remove(os.path.join(rootdir, 'etc/lvm/devices/system.devices')) + grubsyscfg = os.path.join(rootdir, 'etc/sysconfig/grub') + if not os.path.exists(grubsyscfg): + grubsyscfg = os.path.join(rootdir, 'etc/default/grub') + with open(grubsyscfg) as defgrubin: + defgrub = defgrubin.read().split('\n') + with open(grubsyscfg, 'w') as defgrubout: + for gline in defgrub: + gline = gline.split() + newline = [] + for ent in gline: + if ent.startswith('resume=') or ent.startswith('rd.lvm.lv'): + continue + newline.append(ent) + defgrubout.write(' '.join(newline) + '\n') + grubcfg = subprocess.check_output(['find', os.path.join(rootdir, 'boot'), '-name', 'grub.cfg']).decode('utf8').strip().replace(rootdir, '/').replace('//', '/') + grubcfg = grubcfg.split('\n') + if not grubcfg[-1]: + grubcfg = grubcfg[:-1] + if len(grubcfg) == 1: + grubcfg = grubcfg[0] + else: + for gcfg in grubcfg: + rgcfg = os.path.join(rootdir, gcfg[1:]) # gcfg has a leading / to get rid of + if os.stat(rgcfg).st_size > 256: + grubcfg = gcfg + else: + with open(rgcfg, 'r') as gin: + tgrubcfg = gin.read() + tgrubcfg = tgrubcfg.split('\n') + if 'search --no-floppy --fs-uuid --set=dev' in tgrubcfg[0]: + tgrubcfg[0] = 'search --no-floppy --fs-uuid --set=dev ' + bootuuid + elif 'search.fs_uuid ' in tgrubcfg[0] and 'root' in tgrubcfg[0]: + tgrubcfg[0] = 'search.fs_uuid ' + bootuuid + ' root' + with open(rgcfg, 'w') as gout: + for gcline in tgrubcfg: + gout.write(gcline) + gout.write('\n') + try: + # must fixup root@d2:/boot/efi/EFI# cat ubuntu/grub.cfg ... uuid + subprocess.check_call(['chroot', rootdir, 'grub-mkconfig', '-o', grubcfg]) + except Exception as e: + print(repr(e)) + print(rootdir) + print(grubcfg) + time.sleep(86400) + newroot = None + with open('/etc/shadow') as shadowin: + shents = shadowin.read().split('\n') + for shent in shents: + shent = shent.split(':') + if not shent: + continue + if shent[0] == 'root' and shent[1] not in ('*', '!!', ''): + newroot = shent[1] + if newroot: + shlines = None + with open(os.path.join(rootdir, 'etc/shadow')) as oshadow: + shlines = oshadow.read().split('\n') + with open(os.path.join(rootdir, 'etc/shadow'), 'w') as oshadow: + for line in shlines: + if line.startswith('root:'): + line = line.split(':') + line[1] = newroot + line = ':'.join(line) + oshadow.write(line + '\n') + partnum = None + targblock = None + for vol in vols: + if vol['mount'] == '/boot/efi': + targdev = vol['targetdisk'] + partnum = re.search('(\d+)$', targdev).group(1) + targblock = re.search('(.*)\d+$', targdev).group(1) + if targblock: + shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() + shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') + subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) + #other network interfaces + + +def had_swap(): + with open('/etc/fstab') as tabfile: + tabs = tabfile.read().split('\n') + for tab in tabs: + tab = tab.split() + if len(tab) < 3: + continue + if tab[2] == 'swap': + return True + return False + +def install_to_disk(imgpath): + global bootuuid + lvmvols = {} + deftotsize = 0 + mintotsize = 0 + deflvmsize = 0 + minlvmsize = 0 + biggestsize = 0 + biggestfs = None + plainvols = {} + allvols = [] + swapsize = 0 + if had_swap(): + with open('/proc/meminfo') as meminfo: + swapsize = meminfo.read().split('\n')[0] + swapsize = int(swapsize.split()[1]) + if swapsize < 2097152: + swapsize = swapsize * 2 + elif swapsize > 8388608 and swapsize < 67108864: + swapsize = swapsize * 0.5 + elif swapsize >= 67108864: + swapsize = 33554432 + swapsize = int(swapsize * 1024) + deftotsize = swapsize + mintotsize = swapsize + for fs in get_image_metadata(imgpath): + allvols.append(fs) + deftotsize += fs['initsize'] + mintotsize += fs['minsize'] + if fs['initsize'] > biggestsize: + biggestfs = fs + biggestsize = fs['initsize'] + if fs['device'].startswith('/dev/mapper'): + lvmvols[fs['device'].replace('/dev/mapper/', '')] = fs + deflvmsize += fs['initsize'] + minlvmsize += fs['minsize'] + else: + plainvols[int(re.search('(\d+)$', fs['device'])[0])] = fs + with open('/tmp/installdisk') as diskin: + instdisk = diskin.read() + instdisk = '/dev/' + instdisk + parted = PartedRunner(instdisk) + dinfo = parted.run('unit s print', check=False) + dinfo = dinfo.split('\n') + sectors = 0 + sectorsize = 0 + for inf in dinfo: + if inf.startswith('Disk {0}:'.format(instdisk)): + _, sectors = inf.split(': ') + sectors = int(sectors.replace('s', '')) + if inf.startswith('Sector size (logical/physical):'): + _, sectorsize = inf.split(':') + sectorsize = sectorsize.split('/')[0] + sectorsize = sectorsize.replace('B', '') + sectorsize = int(sectorsize) + # for now, only support resizing/growing the largest partition + minexcsize = deftotsize - biggestfs['initsize'] + mintotsize = deftotsize - biggestfs['initsize'] + biggestfs['minsize'] + minsectors = mintotsize // sectorsize + if sectors < (minsectors + 65536): + raise Exception('Disk too small to fit image') + biggestsectors = sectors - (minexcsize // sectorsize) + biggestsize = sectorsize * biggestsectors + parted.run('mklabel gpt') + curroffset = 2048 + for volidx in sorted(plainvols): + vol = plainvols[volidx] + if vol is not biggestfs: + size = vol['initsize'] // sectorsize + else: + size = biggestsize // sectorsize + size += 2047 - (size % 2048) + end = curroffset + size + if end > sectors: + end = sectors + parted.run('mkpart primary {}s {}s'.format(curroffset, end)) + vol['targetdisk'] = instdisk + '{0}'.format(volidx) + curroffset += size + 1 + if not lvmvols: + if swapsize: + swapsize = swapsize // sectorsize + swapsize += 2047 - (size % 2048) + end = curroffset + swapsize + if end > sectors: + end = sectors + parted.run('mkpart swap {}s {}s'.format(curroffset, end)) + subprocess.check_call(['mkswap', instdisk + '{}'.format(volidx + 1)]) + else: + parted.run('mkpart lvm {}s 100%'.format(curroffset)) + lvmpart = instdisk + '{}'.format(volidx + 1) + subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) + subprocess.check_call(['vgcreate', 'localstorage', lvmpart]) + vginfo = subprocess.check_output(['vgdisplay', 'localstorage', '--units', 'b']).decode('utf8') + vginfo = vginfo.split('\n') + pesize = 0 + pes = 0 + for infline in vginfo: + infline = infline.split() + if len(infline) >= 3 and infline[:2] == ['PE', 'Size']: + pesize = int(infline[2]) + if len(infline) >= 5 and infline[:2] == ['Free', 'PE']: + pes = int(infline[4]) + takeaway = swapsize // pesize + for volidx in lvmvols: + vol = lvmvols[volidx] + if vol is biggestfs: + continue + takeaway += vol['initsize'] // pesize + takeaway += 1 + biggestextents = pes - takeaway + for volidx in lvmvols: + vol = lvmvols[volidx] + if vol is biggestfs: + extents = biggestextents + else: + extents = vol['initsize'] // pesize + extents += 1 + if vol['mount'] == '/': + lvname = 'root' + else: + lvname = vol['mount'].replace('/', '_') + subprocess.check_call(['lvcreate', '-l', '{}'.format(extents), '-y', '-n', lvname, 'localstorage']) + vol['targetdisk'] = '/dev/localstorage/{}'.format(lvname) + if swapsize: + subprocess.check_call(['lvcreate', '-y', '-l', '{}'.format(swapsize // pesize), '-n', 'swap', 'localstorage']) + subprocess.check_call(['mkswap', '/dev/localstorage/swap']) + os.makedirs('/run/imginst/targ') + for vol in allvols: + with open(vol['targetdisk'], 'wb') as partition: + partition.write(b'\x00' * 1 * 1024 * 1024) + subprocess.check_call(['mkfs.{}'.format(vol['filesystem']), vol['targetdisk']]) + subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ']) + source = vol['mount'].replace('/', '_') + source = '/run/imginst/sources/' + source + blankfsstat = os.statvfs('/run/imginst/targ') + blankused = (blankfsstat.f_blocks - blankfsstat.f_bfree) * blankfsstat.f_bsize + sys.stdout.write('\nWriting {0}: '.format(vol['mount'])) + with subprocess.Popen(['cp', '-ax', source + '/.', '/run/imginst/targ']) as copier: + stillrunning = copier.poll() + lastprogress = 0.0 + while stillrunning is None: + currfsstat = os.statvfs('/run/imginst/targ') + currused = (currfsstat.f_blocks - currfsstat.f_bfree) * currfsstat.f_bsize + currused -= blankused + with open('/proc/meminfo') as meminf: + for line in meminf.read().split('\n'): + if line.startswith('Dirty:'): + _, dirty, _ = line.split() + dirty = int(dirty) * 1024 + progress = (currused - dirty) / vol['minsize'] + if progress < lastprogress: + progress = lastprogress + if progress > 0.99: + progress = 0.99 + lastprogress = progress + progress = progress * 100 + sys.stdout.write('\x1b[1K\rWriting {0}: {1:3.2f}%'.format(vol['mount'], progress)) + sys.stdout.flush() + time.sleep(0.5) + stillrunning = copier.poll() + if stillrunning != 0: + raise Exception("Error copying volume") + with subprocess.Popen(['sync']) as syncrun: + stillrunning = syncrun.poll() + while stillrunning is None: + with open('/proc/meminfo') as meminf: + for line in meminf.read().split('\n'): + if line.startswith('Dirty:'): + _, dirty, _ = line.split() + dirty = int(dirty) * 1024 + progress = (vol['minsize'] - dirty) / vol['minsize'] + if progress < lastprogress: + progress = lastprogress + if progress > 0.99: + progress = 0.99 + lastprogress = progress + progress = progress * 100 + sys.stdout.write('\x1b[1K\rWriting {0}: {1:3.2f}%'.format(vol['mount'], progress)) + sys.stdout.flush() + time.sleep(0.5) + stillrunning = syncrun.poll() + sys.stdout.write('\x1b[1K\rDone writing {0}'.format(vol['mount'])) + sys.stdout.write('\n') + sys.stdout.flush() + if vol['mount'] == '/boot': + tbootuuid = subprocess.check_output(['blkid', vol['targetdisk']]) + if b'UUID="' in tbootuuid: + bootuuid = tbootuuid.split(b'UUID="', 1)[1].split(b'"')[0].decode('utf8') + + + + + subprocess.check_call(['umount', '/run/imginst/targ']) + for vol in allvols: + subprocess.check_call(['mount', vol['targetdisk'], '/run/imginst/targ/' + vol['mount']]) + fixup('/run/imginst/targ', allvols) + + +if __name__ == '__main__': + install_to_disk(os.environ['mountsrc']) + diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh index 0e1b68df..f1b8e45a 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/imageboot.sh @@ -127,13 +127,13 @@ chmod +x /sysroot/opt/confluent/bin/onboot.sh cp /opt/confluent/bin/apiclient /sysroot/opt/confluent/bin ln -s /etc/systemd/system/onboot.service /sysroot/etc/systemd/system/multi-user.target.wants/onboot.service cp /etc/confluent/functions /sysroot/etc/confluent/functions +mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs +ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware if grep installtodisk /proc/cmdline > /dev/null; then . /etc/confluent/functions run_remote installimage exec reboot -f fi -mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs -ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ -mv /lib/firmware /lib/firmware-ramfs -ln -s /sysroot/lib/firmware /lib/firmware exec /opt/confluent/bin/start_root diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage new file mode 100644 index 00000000..2e791ce6 --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage @@ -0,0 +1,46 @@ +#!/bin/bash +. /etc/confluent/functions +# the image will be used to deploy itself +# provide both access to image (for parsing metadata) +# and existing mounts of image (to take advantage of caching) +mount -o bind /sys /sysroot/sys +mount -o bind /dev /sysroot/dev +mount -o bind /proc /sysroot/proc +mount -o bind /run /sysroot/run + + +if [ ! -f /tmp/mountparts.sh ]; then + mkdir -p /sysroot/run/imginst/sources/_ + mount -o bind /mnt/remote /sysroot/run/imginst/sources/_ +else + for srcmount in $(cat /tmp/mountparts.sh | awk '{print $2}'); do + srcname=${srcmount#/dev/mapper/mproot} + srcdir=$(echo $srcmount | sed -e 's!/dev/mapper/mproot!/mnt/remote!' -e 's!_!/!g') + mkdir -p /sysroot/run/imginst/sources/$srcname + mount -o bind $srcdir /sysroot/run/imginst/sources/$srcname + done +fi +cd /sysroot/run +chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_python getinstalldisk" +chroot /sysroot/ bash -c "source /etc/confluent/functions; run_remote_parts pre.d" +if [ ! -f /sysroot/tmp/installdisk ]; then + echo 'Unable to find a suitable installation target device, ssh to port 2222 to investigate' + while [ ! -f /sysroot/tmp/installdisk ]; do + sleep 1 + done +fi +lvm vgchange -a n +udevadm control -e +if [ -f /sysroot/etc/lvm/devices/system.devices ]; then + rm /sysroot/etc/lvm/devices/system.devices +fi +chroot /sysroot /usr/lib/systemd/systemd-udevd --daemon +chroot /sysroot bash -c "source /etc/confluent/functions; run_remote_python image2disk.py" +echo "Port 22" >> /etc/ssh/sshd_config +echo 'Match LocalPort 22' >> /etc/ssh/sshd_config +echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config +kill -HUP $(cat /run/sshd.pid) + +chroot /sysroot/run/imginst/targ bash -c "source /etc/confluent/functions; run_remote post.sh" +chroot /sysroot bash -c "umount \$(tac /proc/mounts|awk '{print \$2}'|grep ^/run/imginst/targ)" + From 44ec390f400f862c9a04db2c120b3f08b66c54d7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 12:29:27 -0400 Subject: [PATCH 027/126] Add post.sh to ubuntu cloning --- .../profiles/default/scripts/post.sh | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/post.sh diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/post.sh new file mode 100644 index 00000000..67af89ba --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/post.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# This script is executed 'chrooted' into a cloned disk target before rebooting +# + +nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') +confluent_apikey=$(cat /etc/confluent/confluent.apikey) +confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +export nodename confluent_mgr confluent_profile +. /etc/confluent/functions +mkdir -p /var/log/confluent +chmod 700 /var/log/confluent +exec >> /var/log/confluent/confluent-post.log +exec 2>> /var/log/confluent/confluent-post.log +chmod 600 /var/log/confluent/confluent-post.log +tail -f /var/log/confluent/confluent-post.log > /dev/console & +logshowpid=$! +curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/firstboot.service > /etc/systemd/system/firstboot.service +mkdir -p /opt/confluent/bin +curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /opt/confluent/bin/firstboot.sh +chmod +x /opt/confluent/bin/firstboot.sh +systemctl enable firstboot +run_remote_python syncfileclient +run_remote_python confignet +run_remote post.custom +# post scripts may be placed into post.d, e.g. post.d/01-firstaction.sh, post.d/02-secondaction.sh +run_remote_parts post.d + +# Induce execution of remote configuration, e.g. ansible plays in ansible/post.d/ +run_remote_config post.d + +curl -sf -X POST -d 'status: staged' -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" https://$confluent_mgr/confluent-api/self/updatestatus + +kill $logshowpid + + From f6e658c341ac48afd1b542e05a4685791330f608 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 13:25:20 -0400 Subject: [PATCH 028/126] Add site CA to ubuntu profiles on install --- .../ubuntu20.04-diskless/profiles/default/scripts/installimage | 2 ++ confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage index 2e791ce6..84dded4e 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/installimage @@ -40,6 +40,8 @@ echo "Port 22" >> /etc/ssh/sshd_config echo 'Match LocalPort 22' >> /etc/ssh/sshd_config echo ' ChrootDirectory /sysroot/run/imginst/targ' >> /etc/ssh/sshd_config kill -HUP $(cat /run/sshd.pid) +cat /tls/*.pem > /sysroot/run/imginst/targ/usr/local/share/ca-certificates/confluent.crt +chroot /sysroot/run/imginst/targ update-ca-certificates chroot /sysroot/run/imginst/targ bash -c "source /etc/confluent/functions; run_remote post.sh" chroot /sysroot bash -c "umount \$(tac /proc/mounts|awk '{print \$2}'|grep ^/run/imginst/targ)" diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh index 6c99735c..c4fefc3d 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh @@ -74,7 +74,9 @@ if [ -e /sys/firmware/efi ]; then fi fi cat /target/etc/confluent/tls/*.pem > /target/etc/confluent/ca.pem +cat /target/etc/confluent/tls/*.pem > /target/usr/local/share/ca-certificates/confluent.crt cat /target/etc/confluent/tls/*.pem > /etc/confluent/ca.pem +chroot /target update-ca-certificates chroot /target bash -c "source /etc/confluent/functions; run_remote_python syncfileclient" chroot /target bash -c "source /etc/confluent/functions; run_remote_parts post.d" source /target/etc/confluent/functions From abd5f4e9f2af698dd1afbc30f5114aa4df057e09 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 13:51:02 -0400 Subject: [PATCH 029/126] Fix shebang in firstboot on ubuntu cloning --- .../ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh index 3f38cb44..68e212fd 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # This script is executed on the first boot after install has # completed. It is best to edit the middle of the file as From 2268d7f65ae1c8edbec90b8213ffa99734340095 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 13:51:17 -0400 Subject: [PATCH 030/126] Fix hotplug firmware in suse diskless --- .../suse15-diskless/profiles/default/scripts/imageboot.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/imageboot.sh index 3182f972..91e62ebb 100644 --- a/confluent_osdeploy/suse15-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/suse15-diskless/profiles/default/scripts/imageboot.sh @@ -138,4 +138,6 @@ if grep installtodisk /proc/cmdline > /dev/null; then fi mv /lib/modules/$(uname -r) /lib/modules/$(uname -r)-ramfs ln -s /sysroot/lib/modules/$(uname -r) /lib/modules/ +mv /lib/firmware /lib/firmware-ramfs +ln -s /sysroot/lib/firmware /lib/firmware exec /opt/confluent/bin/start_root From fa60a9dc9fbf7b9090b58e71a07ea82a134d412b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 16:34:30 -0400 Subject: [PATCH 031/126] Add IPv6 support to confignet --- .../common/profile/scripts/confignet | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index f44e6f4a..d01a9e45 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -150,23 +150,32 @@ class NetplanManager(object): needcfgapply = False for devname in devnames: needcfgwrite = False + if stgs['ipv6_method'] == 'static': + curraddr = stgs['ipv6_address'] + currips = self.getcfgarrpath([devname, 'addresses']) + if curraddr not in currips: + needcfgwrite = True + currips.append(curraddr) if stgs['ipv4_method'] == 'static': curraddr = stgs['ipv4_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: needcfgwrite = True currips.append(curraddr) - gwaddr = stgs.get('ipv4_gateway', None) - if gwaddr: - cfgroutes = self.getcfgarrpath([devname, 'routes']) - for rinfo in cfgroutes: - if rinfo.get('via', None) == gwaddr: - break - else: - needcfgwrite = True - cfgroutes.append({'via': gwaddr, 'to': 'default'}) + gws = [] + gws.append(stgs.get('ipv4_gateway', None)) + gws.append(stgs.get('ipv6_gateway', None)) + for gwaddr in gws: + if gwaddr: + cfgroutes = self.getcfgarrpath([devname, 'routes']) + for rinfo in cfgroutes: + if rinfo.get('via', None) == gwaddr: + break + else: + needcfgwrite = True + cfgroutes.append({'via': gwaddr, 'to': 'default'}) if needcfgwrite: - needcfgaply = True + needcfgapply = True newcfg = {'network': {'version': 2, 'ethernets': {devname: self.cfgbydev[devname]}}} with open('/etc/netplan/{0}-confluentcfg.yaml'.format(devname), 'w') as planout: planout.write(yaml.dump(newcfg)) From f16cf4387f85d227c82f11427eb65b0237be76fc Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 16:40:02 -0400 Subject: [PATCH 032/126] Further Ubuntu enhancements Add confignet to Ubuntu 20 and 22 Add syncfile to the ubuntu diskless/cloning --- .../profiles/default/scripts/syncfileclient | 286 ++++++++++++++++++ .../profiles/default/scripts/post.sh | 3 + .../profiles/default/scripts/post.sh | 1 + 3 files changed, 290 insertions(+) create mode 100644 confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient new file mode 100644 index 00000000..f7d4c0b4 --- /dev/null +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/syncfileclient @@ -0,0 +1,286 @@ +#!/usr/bin/python3 +import subprocess +import importlib +import tempfile +import json +import os +import shutil +import pwd +import grp +from importlib.machinery import SourceFileLoader +try: + apiclient = SourceFileLoader('apiclient', '/opt/confluent/bin/apiclient').load_module() +except FileNotFoundError: + apiclient = SourceFileLoader('apiclient', '/etc/confluent/apiclient').load_module() + + +def partitionhostsline(line): + comment = '' + try: + cmdidx = line.index('#') + comment = line[cmdidx:] + line = line[:cmdidx].strip() + except ValueError: + pass + if not line: + return '', [], comment + ipaddr, names = line.split(maxsplit=1) + names = names.split() + return ipaddr, names, comment + +class HostMerger(object): + def __init__(self): + self.byip = {} + self.byname = {} + self.sourcelines = [] + self.targlines = [] + + def read_source(self, sourcefile): + with open(sourcefile, 'r') as hfile: + self.sourcelines = hfile.read().split('\n') + while not self.sourcelines[-1]: + self.sourcelines = self.sourcelines[:-1] + for x in range(len(self.sourcelines)): + line = self.sourcelines[x] + currip, names, comment = partitionhostsline(line) + if currip: + self.byip[currip] = x + for name in names: + self.byname[name] = x + + def read_target(self, targetfile): + with open(targetfile, 'r') as hfile: + lines = hfile.read().split('\n') + if not lines[-1]: + lines = lines[:-1] + for y in range(len(lines)): + line = lines[y] + currip, names, comment = partitionhostsline(line) + if currip in self.byip: + x = self.byip[currip] + if self.sourcelines[x] is None: + # have already consumed this enntry + continue + self.targlines.append(self.sourcelines[x]) + self.sourcelines[x] = None + continue + for name in names: + if name in self.byname: + x = self.byname[name] + if self.sourcelines[x] is None: + break + self.targlines.append(self.sourcelines[x]) + self.sourcelines[x] = None + break + else: + self.targlines.append(line) + + def write_out(self, targetfile): + while not self.targlines[-1]: + self.targlines = self.targlines[:-1] + if not self.targlines: + break + while not self.sourcelines[-1]: + self.sourcelines = self.sourcelines[:-1] + if not self.sourcelines: + break + with open(targetfile, 'w') as hosts: + for line in self.targlines: + hosts.write(line + '\n') + for line in self.sourcelines: + if line is not None: + hosts.write(line + '\n') + + +class CredMerger: + def __init__(self): + try: + with open('/etc/login.defs', 'r') as ldefs: + defs = ldefs.read().split('\n') + except FileNotFoundError: + defs = [] + lkup = {} + self.discardnames = {} + self.shadowednames = {} + for line in defs: + try: + line = line[:line.index('#')] + except ValueError: + pass + keyval = line.split() + if len(keyval) < 2: + continue + lkup[keyval[0]] = keyval[1] + self.uidmin = int(lkup.get('UID_MIN', 1000)) + self.uidmax = int(lkup.get('UID_MAX', 60000)) + self.gidmin = int(lkup.get('GID_MIN', 1000)) + self.gidmax = int(lkup.get('GID_MAX', 60000)) + self.shadowlines = None + + def read_passwd(self, source, targfile=False): + self.read_generic(source, self.uidmin, self.uidmax, targfile) + + def read_group(self, source, targfile=False): + self.read_generic(source, self.gidmin, self.gidmax, targfile) + + def read_generic(self, source, minid, maxid, targfile): + if targfile: + self.targdata = [] + else: + self.sourcedata = [] + with open(source, 'r') as inputfile: + for line in inputfile.read().split('\n'): + try: + name, _, uid, _ = line.split(':', 3) + uid = int(uid) + except ValueError: + continue + if targfile: + if uid < minid or uid > maxid: + self.targdata.append(line) + else: + self.discardnames[name] = 1 + else: + if name[0] in ('+', '#', '@'): + self.sourcedata.append(line) + elif uid >= minid and uid <= maxid: + self.sourcedata.append(line) + + def read_shadow(self, source): + self.shadowlines = [] + try: + with open(source, 'r') as inshadow: + for line in inshadow.read().split('\n'): + try: + name, _ = line.split(':' , 1) + except ValueError: + continue + if name in self.discardnames: + continue + self.shadowednames[name] = 1 + self.shadowlines.append(line) + except FileNotFoundError: + return + + def write_out(self, outfile): + with open(outfile, 'w') as targ: + for line in self.targdata: + targ.write(line + '\n') + for line in self.sourcedata: + targ.write(line + '\n') + if outfile == '/etc/passwd': + if self.shadowlines is None: + self.read_shadow('/etc/shadow') + with open('/etc/shadow', 'w') as shadout: + for line in self.shadowlines: + shadout.write(line + '\n') + for line in self.sourcedata: + name, _ = line.split(':', 1) + if name[0] in ('+', '#', '@'): + continue + if name in self.shadowednames: + continue + shadout.write(name + ':!:::::::\n') + if outfile == '/etc/group': + if self.shadowlines is None: + self.read_shadow('/etc/gshadow') + with open('/etc/gshadow', 'w') as shadout: + for line in self.shadowlines: + shadout.write(line + '\n') + for line in self.sourcedata: + name, _ = line.split(':' , 1) + if name in self.shadowednames: + continue + shadout.write(name + ':!::\n') + +def appendonce(basepath, filename): + with open(filename, 'rb') as filehdl: + thedata = filehdl.read() + targname = filename.replace(basepath, '') + try: + with open(targname, 'rb') as filehdl: + targdata = filehdl.read() + except IOError: + targdata = b'' + if thedata in targdata: + return + with open(targname, 'ab') as targhdl: + targhdl.write(thedata) + +def synchronize(): + tmpdir = tempfile.mkdtemp() + appendoncedir = tempfile.mkdtemp() + try: + ac = apiclient.HTTPSClient() + myips = [] + ipaddrs = subprocess.check_output(['ip', '-br', 'a']).split(b'\n') + for line in ipaddrs: + isa = line.split() + if len(isa) < 3 or isa[1] != b'UP': + continue + for addr in isa[2:]: + if addr.startswith(b'fe80::') or addr.startswith(b'169.254'): + continue + addr = addr.split(b'/')[0] + if not isinstance(addr, str): + addr = addr.decode('utf8') + myips.append(addr) + data = json.dumps({'merge': tmpdir, 'appendonce': appendoncedir, 'myips': myips}) + status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles', data) + if status == 202: + lastrsp = '' + while status != 204: + status, rsp = ac.grab_url_with_status('/confluent-api/self/remotesyncfiles') + if not isinstance(rsp, str): + rsp = rsp.decode('utf8') + if status == 200: + lastrsp = rsp + pendpasswd = os.path.join(tmpdir, 'etc/passwd') + if os.path.exists(pendpasswd): + cm = CredMerger() + cm.read_passwd(pendpasswd, targfile=False) + cm.read_passwd('/etc/passwd', targfile=True) + cm.write_out('/etc/passwd') + pendgroup = os.path.join(tmpdir, 'etc/group') + if os.path.exists(pendgroup): + cm = CredMerger() + cm.read_group(pendgroup, targfile=False) + cm.read_group('/etc/group', targfile=True) + cm.write_out('/etc/group') + pendhosts = os.path.join(tmpdir, 'etc/hosts') + if os.path.exists(pendhosts): + cm = HostMerger() + cm.read_source(pendhosts) + cm.read_target('/etc/hosts') + cm.write_out('/etc/hosts') + for dirn in os.walk(appendoncedir): + for filen in dirn[2]: + appendonce(appendoncedir, os.path.join(dirn[0], filen)) + if lastrsp: + lastrsp = json.loads(lastrsp) + opts = lastrsp.get('options', {}) + for fname in opts: + uid = -1 + gid = -1 + for opt in opts[fname]: + if opt == 'owner': + try: + uid = pwd.getpwnam(opts[fname][opt]['name']).pw_uid + except KeyError: + uid = opts[fname][opt]['id'] + elif opt == 'group': + try: + gid = grp.getgrnam(opts[fname][opt]['name']).gr_gid + except KeyError: + gid = opts[fname][opt]['id'] + elif opt == 'permissions': + os.chmod(fname, int(opts[fname][opt], 8)) + if uid != -1 or gid != -1: + os.chown(fname, uid, gid) + finally: + shutil.rmtree(tmpdir) + shutil.rmtree(appendoncedir) + + +if __name__ == '__main__': + synchronize() diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh index 6c99735c..7b970285 100755 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/post.sh @@ -74,8 +74,11 @@ if [ -e /sys/firmware/efi ]; then fi fi cat /target/etc/confluent/tls/*.pem > /target/etc/confluent/ca.pem +cat /target/etc/confluent/tls/*.pem > /target/usr/local/share/ca-certificates/confluent.crt cat /target/etc/confluent/tls/*.pem > /etc/confluent/ca.pem +chroot /target update-ca-certificates chroot /target bash -c "source /etc/confluent/functions; run_remote_python syncfileclient" +chroot /target bash -c "source /etc/confluent/functions; run_remote_python confignet" chroot /target bash -c "source /etc/confluent/functions; run_remote_parts post.d" source /target/etc/confluent/functions diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh index c4fefc3d..7b970285 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh @@ -78,6 +78,7 @@ cat /target/etc/confluent/tls/*.pem > /target/usr/local/share/ca-certificates/co cat /target/etc/confluent/tls/*.pem > /etc/confluent/ca.pem chroot /target update-ca-certificates chroot /target bash -c "source /etc/confluent/functions; run_remote_python syncfileclient" +chroot /target bash -c "source /etc/confluent/functions; run_remote_python confignet" chroot /target bash -c "source /etc/confluent/functions; run_remote_parts post.d" source /target/etc/confluent/functions From 89e573207cd52f846f44f5159f264935c71eb15b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Sep 2023 16:55:01 -0400 Subject: [PATCH 033/126] Remove irrelevant command from ubuntu cloning firstboot --- .../ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh index 68e212fd..97697312 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/firstboot.sh @@ -24,8 +24,6 @@ done if [ ! -f /etc/confluent/firstboot.ran ]; then touch /etc/confluent/firstboot.ran - cat /etc/confluent/tls/*.pem >> /etc/pki/tls/certs/ca-bundle.crt - run_remote firstboot.custom # Firstboot scripts may be placed into firstboot.d, e.g. firstboot.d/01-firstaction.sh, firstboot.d/02-secondaction.sh run_remote_parts firstboot.d From 3cacbf283ace3ab2f6b70ded7ecc8b9f23463609 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Sep 2023 10:09:19 -0400 Subject: [PATCH 034/126] Fix cloning to NVME boot The partition device names have a different scheme for nvme v. other devices. --- .../profiles/default/scripts/image2disk.py | 13 ++++++++++--- .../profiles/default/scripts/image2disk.py | 12 +++++++++--- .../profiles/default/scripts/image2disk.py | 12 +++++++++--- .../profiles/default/scripts/image2disk.py | 13 ++++++++++--- 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py index fa378632..08d48a9c 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py @@ -11,6 +11,13 @@ import struct import sys import subprocess +def get_partname(devname, idx): + if devname[-1] in '0123456789': + return '{}p{}'.format(devname, idx) + else: + return '{}{}'.format(devname, idx) + + def get_next_part_meta(img, imgsize): if img.tell() == imgsize: return None @@ -258,7 +265,7 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart primary {}s {}s'.format(curroffset, end)) - vol['targetdisk'] = instdisk + '{0}'.format(volidx) + vol['targetdisk'] = get_partname(instdisk , volidx) curroffset += size + 1 if not lvmvols: if swapsize: @@ -268,10 +275,10 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart swap {}s {}s'.format(curroffset, end)) - subprocess.check_call(['mkswap', instdisk + '{}'.format(volidx + 1)]) + subprocess.check_call(['mkswap', get_partname(instdisk, volidx + 1)]) else: parted.run('mkpart lvm {}s 100%'.format(curroffset)) - lvmpart = instdisk + '{}'.format(volidx + 1) + lvmpart = get_partname(instdisk, volidx + 1) subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) subprocess.check_call(['vgcreate', 'localstorage', lvmpart]) vginfo = subprocess.check_output(['vgdisplay', 'localstorage', '--units', 'b']).decode('utf8') diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index 0f0a6745..92facb4b 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -13,6 +13,12 @@ import subprocess bootuuid = None +def get_partname(devname, idx): + if devname[-1] in '0123456789': + return '{}p{}'.format(devname, idx) + else: + return '{}{}'.format(devname, idx) + def get_next_part_meta(img, imgsize): if img.tell() == imgsize: return None @@ -295,7 +301,7 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart primary {}s {}s'.format(curroffset, end)) - vol['targetdisk'] = instdisk + '{0}'.format(volidx) + vol['targetdisk'] = get_partname(instdisk, volidx) curroffset += size + 1 if not lvmvols: if swapsize: @@ -305,10 +311,10 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart swap {}s {}s'.format(curroffset, end)) - subprocess.check_call(['mkswap', instdisk + '{}'.format(volidx + 1)]) + subprocess.check_call(['mkswap', get_partname(instdisk, volidx + 1)]) else: parted.run('mkpart lvm {}s 100%'.format(curroffset)) - lvmpart = instdisk + '{}'.format(volidx + 1) + lvmpart = get_partname(instdisk, volidx + 1) subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) subprocess.check_call(['vgcreate', 'localstorage', lvmpart]) vginfo = subprocess.check_output(['vgdisplay', 'localstorage', '--units', 'b']).decode('utf8') diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 0f0a6745..92facb4b 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -13,6 +13,12 @@ import subprocess bootuuid = None +def get_partname(devname, idx): + if devname[-1] in '0123456789': + return '{}p{}'.format(devname, idx) + else: + return '{}{}'.format(devname, idx) + def get_next_part_meta(img, imgsize): if img.tell() == imgsize: return None @@ -295,7 +301,7 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart primary {}s {}s'.format(curroffset, end)) - vol['targetdisk'] = instdisk + '{0}'.format(volidx) + vol['targetdisk'] = get_partname(instdisk, volidx) curroffset += size + 1 if not lvmvols: if swapsize: @@ -305,10 +311,10 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart swap {}s {}s'.format(curroffset, end)) - subprocess.check_call(['mkswap', instdisk + '{}'.format(volidx + 1)]) + subprocess.check_call(['mkswap', get_partname(instdisk, volidx + 1)]) else: parted.run('mkpart lvm {}s 100%'.format(curroffset)) - lvmpart = instdisk + '{}'.format(volidx + 1) + lvmpart = get_partname(instdisk, volidx + 1) subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) subprocess.check_call(['vgcreate', 'localstorage', lvmpart]) vginfo = subprocess.check_output(['vgdisplay', 'localstorage', '--units', 'b']).decode('utf8') diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py index 7371dcf1..5d15e3d4 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py @@ -13,6 +13,13 @@ import subprocess bootuuid = None + +def get_partname(devname, idx): + if devname[-1] in '0123456789': + return '{}p{}'.format(devname, idx) + else: + return '{}{}'.format(devname, idx) + def get_next_part_meta(img, imgsize): if img.tell() == imgsize: return None @@ -292,7 +299,7 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart primary {}s {}s'.format(curroffset, end)) - vol['targetdisk'] = instdisk + '{0}'.format(volidx) + vol['targetdisk'] = get_partname(instdisk, volidx) curroffset += size + 1 if not lvmvols: if swapsize: @@ -302,10 +309,10 @@ def install_to_disk(imgpath): if end > sectors: end = sectors parted.run('mkpart swap {}s {}s'.format(curroffset, end)) - subprocess.check_call(['mkswap', instdisk + '{}'.format(volidx + 1)]) + subprocess.check_call(['mkswap', get_partname(instdisk, volidx + 1)]) else: parted.run('mkpart lvm {}s 100%'.format(curroffset)) - lvmpart = instdisk + '{}'.format(volidx + 1) + lvmpart = get_partname(instdisk, volidx + 1) subprocess.check_call(['pvcreate', '-ff', '-y', lvmpart]) subprocess.check_call(['vgcreate', 'localstorage', lvmpart]) vginfo = subprocess.check_output(['vgdisplay', 'localstorage', '--units', 'b']).decode('utf8') From 8ca1f80ef6f523d722f2f6cb3bc6fa63f2ae187d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Sep 2023 14:36:56 -0400 Subject: [PATCH 035/126] Fix implicit nic in confignet If the implicit IP is not in any of the attribute groups of net, then auto-vivify from the normal place. --- confluent_server/confluent/netutil.py | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index dcce1544..ebf88b1d 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -332,6 +332,18 @@ def get_full_net_config(configmanager, node, serverip=None): if serverip: myaddrs = get_addresses_by_serverip(serverip) nm = NetManager(myaddrs, node, configmanager) + nnc = get_nic_config(configmanager, node, serverip=serverip) + defaultnic = {} + if nnc.get('ipv4_address', None): + defaultnic['ipv4_address'] = '{}/{}'.format(nnc['ipv4_address'], nnc['prefix']) + if nnc.get('ipv4_gateway', None): + defaultnic['ipv4_gateway'] = nnc['ipv4_gateway'] + if nnc.get('ipv4_method', None): + defaultnic['ipv4_method'] = nnc['ipv4_method'] + if nnc.get('ipv6_address', None): + defaultnic['ipv6_address'] = '{}/{}'.format(nnc['ipv6_address'], nnc['ipv6_prefix']) + if nnc.get('ipv6_method', None): + defaultnic['ipv6_method'] = nnc['ipv6_method'] if None in attribs: nm.process_attribs(None, attribs[None]) del attribs[None] @@ -345,6 +357,29 @@ def get_full_net_config(configmanager, node, serverip=None): retattrs['extranets'] = nm.myattribs for attri in retattrs['extranets']: add_netmask(retattrs['extranets'][attri]) + if retattrs['extranets'][attri].get('ipv4_address', None) == defaultnic.get('ipv4_address', 'NOPE'): + defaultnic = {} + if retattrs['extranets'][attri].get('ipv6_address', None) == defaultnic.get('ipv6_address', 'NOPE'): + defaultnic = {} + if 'default' not in retattrs and defaultnic: + retattrs['default'] = defaultnic + add_netmask(retattrs['default']) + ipv4addr = defaultnic.get('ipv4_address', None) + if '/' in ipv4addr: + ipv4bytes = socket.inet_pton(socket.AF_INET, ipv4addr.split('/')[0]) + for addr in nm.myaddrs: + if addr[0] != socket.AF_INET: + continue + if ipn_on_same_subnet(addr[0], addr[1], ipv4bytes, addr[2]): + defaultnic['current_nic'] = True + ipv6addr = defaultnic.get('ipv6_address', None) + if '/' in ipv6addr: + ipv6bytes = socket.inet_pton(socket.AF_INET6, ipv6addr.split('/')[0]) + for addr in nm.myaddrs: + if addr[0] != socket.AF_INET6: + continue + if ipn_on_same_subnet(addr[0], addr[1], ipv6bytes, addr[2]): + defaultnic['current_nic'] = True return retattrs From 691d92f735cd82a4ab73a36b418ff823248351f1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Sep 2023 14:41:16 -0400 Subject: [PATCH 036/126] Avoid calling implicit nic config if nowhere to put it If 'None' attributes are in use, we'd have no where to stick implicit configuration anyway. --- confluent_server/confluent/netutil.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index ebf88b1d..66ed2169 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -332,18 +332,7 @@ def get_full_net_config(configmanager, node, serverip=None): if serverip: myaddrs = get_addresses_by_serverip(serverip) nm = NetManager(myaddrs, node, configmanager) - nnc = get_nic_config(configmanager, node, serverip=serverip) defaultnic = {} - if nnc.get('ipv4_address', None): - defaultnic['ipv4_address'] = '{}/{}'.format(nnc['ipv4_address'], nnc['prefix']) - if nnc.get('ipv4_gateway', None): - defaultnic['ipv4_gateway'] = nnc['ipv4_gateway'] - if nnc.get('ipv4_method', None): - defaultnic['ipv4_method'] = nnc['ipv4_method'] - if nnc.get('ipv6_address', None): - defaultnic['ipv6_address'] = '{}/{}'.format(nnc['ipv6_address'], nnc['ipv6_prefix']) - if nnc.get('ipv6_method', None): - defaultnic['ipv6_method'] = nnc['ipv6_method'] if None in attribs: nm.process_attribs(None, attribs[None]) del attribs[None] @@ -354,6 +343,18 @@ def get_full_net_config(configmanager, node, serverip=None): retattrs['default'] = nm.myattribs[None] add_netmask(retattrs['default']) del nm.myattribs[None] + else: + nnc = get_nic_config(configmanager, node, serverip=serverip) + if nnc.get('ipv4_address', None): + defaultnic['ipv4_address'] = '{}/{}'.format(nnc['ipv4_address'], nnc['prefix']) + if nnc.get('ipv4_gateway', None): + defaultnic['ipv4_gateway'] = nnc['ipv4_gateway'] + if nnc.get('ipv4_method', None): + defaultnic['ipv4_method'] = nnc['ipv4_method'] + if nnc.get('ipv6_address', None): + defaultnic['ipv6_address'] = '{}/{}'.format(nnc['ipv6_address'], nnc['ipv6_prefix']) + if nnc.get('ipv6_method', None): + defaultnic['ipv6_method'] = nnc['ipv6_method'] retattrs['extranets'] = nm.myattribs for attri in retattrs['extranets']: add_netmask(retattrs['extranets'][attri]) @@ -361,7 +362,7 @@ def get_full_net_config(configmanager, node, serverip=None): defaultnic = {} if retattrs['extranets'][attri].get('ipv6_address', None) == defaultnic.get('ipv6_address', 'NOPE'): defaultnic = {} - if 'default' not in retattrs and defaultnic: + if defaultnic: retattrs['default'] = defaultnic add_netmask(retattrs['default']) ipv4addr = defaultnic.get('ipv4_address', None) From fe78034eaa880de83f1857b02f21b96d3d9a9c4a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Sep 2023 14:56:07 -0400 Subject: [PATCH 037/126] Add '-y' to imgutil build Allow non-interactive imgutil build. --- imgutil/imgutil | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index b683b1e5..73005c39 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -437,6 +437,7 @@ def get_mydir(oscategory): class OsHandler(object): def __init__(self, name, version, arch, args): self.name = name + self._interactive = True self.version = version self.arch = arch self.sourcepath = None @@ -464,6 +465,9 @@ class OsHandler(object): except AttributeError: self.addrepos = [] + def set_interactive(self, shouldbeinteractive): + self._interactive = shouldbeinteractive + def get_json(self): odata = [self.oscategory, self.version, self.arch, self.name] for idx in range(len(odata)): @@ -577,7 +581,10 @@ class SuseHandler(OsHandler): cmd = ['chmod', 'a+x'] cmd.extend(glob.glob(os.path.join(targdir, '*'))) subprocess.check_call(cmd) - subprocess.check_call(['zypper', '-R', self.targpath, 'install'] + self.zyppargs) + if self._interactive: + subprocess.check_call(['zypper', '-R', self.targpath, 'install'] + self.zyppargs) + else: + subprocess.check_call(['zypper', '-n', '-R', self.targpath, 'install'] + self.zyppargs) os.symlink('/usr/lib/systemd/system/sshd.service', os.path.join(self.targpath, 'etc/systemd/system/multi-user.target.wants/sshd.service')) if os.path.exists(os.path.join(self.targpath, 'sbin/mkinitrd')): args.cmd = ['mkinitrd'] @@ -625,7 +632,6 @@ class ElHandler(OsHandler): self.yumargs = [] super().__init__(name, version, arch, args) - def add_pkglists(self): self.yumargs.extend(self.list_packages()) @@ -657,7 +663,10 @@ class ElHandler(OsHandler): cmd = ['chmod', 'a+x'] cmd.extend(glob.glob(os.path.join(targdir, '*'))) subprocess.check_call(cmd) - subprocess.check_call(['yum'] + self.yumargs) + if self._interactive: + subprocess.check_call(['yum'] + self.yumargs) + else: + subprocess.check_call(['yum', '-y'] + self.yumargs) with open('/proc/mounts') as mountinfo: for line in mountinfo.readlines(): if line.startswith('selinuxfs '): @@ -794,6 +803,7 @@ def main(): buildp.add_argument('-a', '--addpackagelist', action='append', default=[], help='A list of additional packages to include, may be specified multiple times') buildp.add_argument('-s', '--source', help='Directory to pull installation from, typically a subdirectory of /var/lib/confluent/distributions. By default, the repositories for the build system are used.') + buildp.add_argument('-y', '--non-interactive', help='Avoid prompting for confirmation', action='store_true') buildp.add_argument('-v', '--volume', help='Directory to make available in the build environment. -v / will ' 'cause it to be mounted in image as /run/external/, -v /:/run/root ' @@ -1128,6 +1138,8 @@ def build_root(args): sys.stderr.write( 'Unable to recognize build system os\n') sys.exit(1) + if args.non_interactive: + oshandler.set_interactive(True) oshandler.set_target(args.scratchdir) oshandler.add_pkglists() for dirname in ('proc', 'sys', 'dev', 'run'): From 8de6f4356d7b3eacd91f1aef94337dfab466a498 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Sep 2023 15:12:43 -0400 Subject: [PATCH 038/126] Fix sense of new flag Accidently made flag exactly opposite of intended --- imgutil/imgutil | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 73005c39..39c40024 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -1139,7 +1139,7 @@ def build_root(args): 'Unable to recognize build system os\n') sys.exit(1) if args.non_interactive: - oshandler.set_interactive(True) + oshandler.set_interactive(False) oshandler.set_target(args.scratchdir) oshandler.add_pkglists() for dirname in ('proc', 'sys', 'dev', 'run'): From 9441221150aa0aa085f5c0b882828bb4a6c36ca9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Sep 2023 11:30:57 -0400 Subject: [PATCH 039/126] Have cooltera plugin adapt As new sensors appear, be more adaptive to continue tracking existing sensors. --- .../confluent/plugins/hardwaremanagement/cooltera.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py b/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py index 1b89271e..c6e4b070 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/cooltera.py @@ -210,10 +210,12 @@ def xml2stateinfo(statdata): stateinfo = [] sensornames = sorted([x.tag for x in statdata]) themodel = None - for model in sensorsbymodel: - if sensorsbymodel[model] == sensornames: + for model in sorted(sensorsbymodel): + if all([x in sensornames for x in sensorsbymodel[model]]): themodel = model break + else: + print(repr(sensornames)) thesensors = _thesensors[themodel] #['mode', 't1', 't2a', 't2b', 't2c', 't2', 't5', 't3', 't4', 'dw', 't3', 'rh', 'setpoint', 'secflow', 'primflow', 'ps1', 'ps1a', 'ps1b', 'ps2', 'ps3', 'ps4', 'ps5a', 'ps5b', 'ps5c', 'sdp', 'valve', 'valve2', 'pumpspeed1', 'pumpspeed2', 'pumpspeed3', 'alarms', 'dt', 'p3state', 'duty'] for tagname in thesensors: From 203dabfb0bf4ccd9836ebba153c2a30e6c2e54d1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Sep 2023 16:56:58 -0400 Subject: [PATCH 040/126] Place nameservers on every NIC in netplan netplan, like others, makes the questionable choice to designate DNS as a NIC specific setting, despite not mapping well to a NIC. Since we model DNS like NTP, a global, just repeat the DNS config for every interface. This redundancy is fine in testing multiple interfaces. --- .../common/profile/scripts/confignet | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index d01a9e45..dec1808d 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -112,9 +112,10 @@ def get_interface_name(iname, settings): return None class NetplanManager(object): - def __init__(self): + def __init__(self, deploycfg): self.cfgbydev = {} self.read_connections() + self.deploycfg = deploycfg def read_connections(self): for plan in glob.glob('/etc/netplan/*.y*ml'): @@ -174,6 +175,19 @@ class NetplanManager(object): else: needcfgwrite = True cfgroutes.append({'via': gwaddr, 'to': 'default'}) + dnsips = self.deploycfg.get('nameservers', []) + dnsdomain = self.deploycfg.get('dnsdomain', '') + if dnsips: + currdnsips = self.getcfgarrpath([devname, 'nameservers', 'addresses']) + for dnsip in dnsips: + if dnsip not in currdnsips: + needcfgwrite = True + currdnsips.append(dnsip) + if dnsdomain: + currdnsdomain = self.getcfgarrpath([devname, 'nameservers', 'search']) + if dnsdomain not in currdnsdomain: + needcfgwrite = True + currdnsdomain.append(dnsdomain) if needcfgwrite: needcfgapply = True newcfg = {'network': {'version': 2, 'ethernets': {devname: self.cfgbydev[devname]}}} @@ -403,6 +417,7 @@ if __name__ == '__main__': myaddrs = apiclient.get_my_addresses() srvs, _ = apiclient.scan_confluents() doneidxs = set([]) + dc = None for srv in srvs: try: s = socket.create_connection((srv, 443)) @@ -422,6 +437,9 @@ if __name__ == '__main__': continue status, nc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/netcfg') nc = json.loads(nc) + if not dc: + status, dc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/deploycfg2') + dc = json.loads(dc) iname = get_interface_name(idxmap[curridx], nc.get('default', {})) if iname: for iname in iname.split(','): @@ -448,7 +466,7 @@ if __name__ == '__main__': del netname_to_interfaces['default'] rm_tmp_llas(tmpllas) if os.path.exists('/usr/sbin/netplan'): - nm = NetplanManager() + nm = NetplanManager(dc) if os.path.exists('/usr/bin/nmcli'): nm = NetworkManager(devtypes) elif os.path.exists('/usr/sbin/wicked'): From f82829aa0ce253f89ef7f81f0d5d543749849011 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Sep 2023 17:10:27 -0400 Subject: [PATCH 041/126] Add dependency checking to imgutil capture This will more quickly indicate problems in a profile trying to capture. First iteration will address Ubuntu. --- imgutil/imgutil | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 39c40024..8f805ac5 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -196,7 +196,13 @@ def capture_remote(args): finfo = subprocess.check_output(['ssh', targ, 'python3', '/run/imgutil/capenv/imgutil', 'getfingerprint']).decode('utf8') finfo = json.loads(finfo) if finfo['oscategory'] not in ('el8', 'el9', 'ubuntu20.04', 'ubuntu22.04'): - raise Exception('Not yet supported for capture: ' + repr(finfo)) + sys.stderr.write('Not yet supported for capture: ' + repr(finfo) + '\n') + sys.exit(1) + unmet = finfo.get('unmetprereqs', []) + if unmet: + for cmd in unmet: + sys.stderr.write(cmd + '\n') + sys.exit(1) oscat = finfo['oscategory'] subprocess.check_call(['ssh', '-o', 'LogLevel=QUIET', '-t', targ, 'python3', '/run/imgutil/capenv/imgutil', 'capturelocal']) utillib = __file__.replace('bin/imgutil', 'lib/imgutil') @@ -442,6 +448,7 @@ class OsHandler(object): self.arch = arch self.sourcepath = None self.osname = '{}-{}-{}'.format(name, version, arch) + self.captureprereqs = [] try: pkglist = args.packagelist except AttributeError: @@ -474,7 +481,7 @@ class OsHandler(object): if not isinstance(odata[idx], str): odata[idx] = odata[idx].decode('utf8') info = {'oscategory': odata[0], - 'version': odata[1], 'arch': odata[2], 'name': odata[3]} + 'version': odata[1], 'arch': odata[2], 'name': odata[3], 'unmetprereqs': self.captureprereqs} return json.dumps(info) def prep_root_premount(self, args): @@ -594,12 +601,21 @@ class SuseHandler(OsHandler): class DebHandler(OsHandler): - def __init__(self, name, version, arch, args, codename): + def __init__(self, name, version, arch, args, codename, hostpath): self.includepkgs = [] self.targpath = None self.codename = codename self.oscategory = name + version super().__init__(name, version, arch, args) + needpkgs = [] + if not os.path.exists(os.path.join(hostpath, 'usr/bin/tpm2_getcap')): + needpkgs.append('tpm2-tools') + lfuses = glob.glob('/lib/*/libfuse.so.2') + if not lfuses: + needpkgs.append('libfuse2') + if needpkgs: + needapt = 'Missing packages needed in target for capture, to add required packages: apt install ' + ' '.join(needpkgs) + self.captureprereqs.append(needapt) def add_pkglists(self): self.includepkgs.extend(self.list_packages()) @@ -1082,7 +1098,7 @@ def fingerprint_host_deb(args, hostpath='/'): except IOError: pass if osname: - return DebHandler(osname, vers, os.uname().machine, args, codename) + return DebHandler(osname, vers, os.uname().machine, args, codename, hostpath) def fingerprint_host_suse(args, hostpath='/'): From d7190c893ff1c2211dcb4594bba4c873afb94002 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 11 Sep 2023 13:11:17 -0400 Subject: [PATCH 042/126] Correct networkmanager entries for boot over infiniband When booting and infiniband comes up as the 'get started' nic, make the entry use correct type so as to avoid confusion later, particularly when confignet comes along to fixup the configuration. --- .../lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 12 ++++++++---- .../lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 11 +++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 31233c82..b2881e0b 100644 --- a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -189,8 +189,15 @@ cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC EOC echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection +linktype=$(ip link |grep -A2 ${ifname}|tail -n 1|awk '{print $1}') +if [ "$linktype" = link/infiniband ]; then + linktype="infiniband" +else + linktype="ethernet" +fi +echo type=$linktype >> /run/NetworkManager/system-connections/$ifname.nmconnection + cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC -type=ethernet autoconnect-retries=1 EOC echo interface-name=$ifname >> /run/NetworkManager/system-connections/$ifname.nmconnection @@ -199,9 +206,6 @@ multi-connect=1 permissions= wait-device-timeout=60000 -[ethernet] -mac-address-blacklist= - EOC autoconfigmethod=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg |awk '{print $2}') auto6configmethod=$(grep ^ipv6_method: /etc/confluent/confluent.deploycfg |awk '{print $2}') diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 686e14ce..4fca92cf 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -154,8 +154,14 @@ cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC EOC echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection +linktype=$(ip link |grep -A2 ${ifname}|tail -n 1|awk '{print $1}') +if [ "$linktype" = link/infiniband ]; then + linktype="infiniband" +else + linktype="ethernet" +fi +echo type=$linktype >> /run/NetworkManager/system-connections/$ifname.nmconnection cat >> /run/NetworkManager/system-connections/$ifname.nmconnection << EOC -type=ethernet autoconnect-retries=1 EOC echo interface-name=$ifname >> /run/NetworkManager/system-connections/$ifname.nmconnection @@ -164,9 +170,6 @@ multi-connect=1 permissions= wait-device-timeout=60000 -[ethernet] -mac-address-blacklist= - EOC autoconfigmethod=$(grep ^ipv4_method: /etc/confluent/confluent.deploycfg |awk '{print $2}') auto6configmethod=$(grep ^ipv6_method: /etc/confluent/confluent.deploycfg |awk '{print $2}') From b77d8b1f210519c5361d5475d190941d9ac8dc11 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Sep 2023 10:45:15 -0400 Subject: [PATCH 043/126] Make yaml import conditional Only Ubuntu requires it, but tends to have it. Other distributions do not tend to have it. --- imgutil/imgutil | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 8f805ac5..4e46ce0f 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -23,7 +23,10 @@ import subprocess import sys import tempfile import time -import yaml +try: + import yaml +except ImportError: + pass path = os.path.dirname(os.path.realpath(__file__)) path = os.path.realpath(os.path.join(path, '..', 'lib', 'python')) if path.startswith('/opt'): From 00eb9e3c9d949db56e82e80820469cb373816c97 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Sep 2023 16:49:15 -0400 Subject: [PATCH 044/126] Fix full_net_config with missing address info --- confluent_server/confluent/netutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 66ed2169..37e8d198 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -366,7 +366,7 @@ def get_full_net_config(configmanager, node, serverip=None): retattrs['default'] = defaultnic add_netmask(retattrs['default']) ipv4addr = defaultnic.get('ipv4_address', None) - if '/' in ipv4addr: + if ipv4addr and '/' in ipv4addr: ipv4bytes = socket.inet_pton(socket.AF_INET, ipv4addr.split('/')[0]) for addr in nm.myaddrs: if addr[0] != socket.AF_INET: @@ -374,7 +374,7 @@ def get_full_net_config(configmanager, node, serverip=None): if ipn_on_same_subnet(addr[0], addr[1], ipv4bytes, addr[2]): defaultnic['current_nic'] = True ipv6addr = defaultnic.get('ipv6_address', None) - if '/' in ipv6addr: + if ipv6addr and '/' in ipv6addr: ipv6bytes = socket.inet_pton(socket.AF_INET6, ipv6addr.split('/')[0]) for addr in nm.myaddrs: if addr[0] != socket.AF_INET6: From e6b0b235e959a32a243a8d8a1c330fbd9cc370c7 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Sep 2023 16:53:32 -0400 Subject: [PATCH 045/126] Fix image2disk for nvme clone targets --- .../profiles/default/scripts/image2disk.py | 12 +++++++++--- .../profiles/default/scripts/image2disk.py | 2 ++ .../profiles/default/scripts/image2disk.py | 2 ++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py index 08d48a9c..8548cebd 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py @@ -60,10 +60,13 @@ class PartedRunner(): def __init__(self, disk): self.disk = disk - def run(self, command): + def run(self, command, check=True):: command = command.split() command = ['parted', '-a', 'optimal', '-s', self.disk] + command - return subprocess.check_output(command).decode('utf8') + if check: + return subprocess.check_output(command).decode('utf8') + else: + return subprocess.run(command, stdout=subprocess.PIPE).stdout.decode('utf8') def fixup(rootdir, vols): devbymount = {} @@ -173,6 +176,8 @@ def fixup(rootdir, vols): partnum = re.search('(\d+)$', targdev).group(1) targblock = re.search('(.*)\d+$', targdev).group(1) if targblock: + if targblock.endswith('p') and 'nvme' in targblock: + targblock = targblock[:-1] shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) @@ -231,7 +236,8 @@ def install_to_disk(imgpath): instdisk = diskin.read() instdisk = '/dev/' + instdisk parted = PartedRunner(instdisk) - dinfo = parted.run('unit s print') + # do this safer, unit s print might bomb + dinfo = parted.run('unit s print', check=False) dinfo = dinfo.split('\n') sectors = 0 sectorsize = 0 diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py index 92facb4b..aaaca9d4 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/image2disk.py @@ -208,6 +208,8 @@ def fixup(rootdir, vols): partnum = re.search('(\d+)$', targdev).group(1) targblock = re.search('(.*)\d+$', targdev).group(1) if targblock: + if 'nvme' in targblock and targblock[-1] == 'p': + targblock = targblock[:-1] shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py index 92facb4b..7b312a93 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/image2disk.py @@ -208,6 +208,8 @@ def fixup(rootdir, vols): partnum = re.search('(\d+)$', targdev).group(1) targblock = re.search('(.*)\d+$', targdev).group(1) if targblock: + if targblock.endswith('p') and 'nvme' in targblock: + targblock = targblock[:-1] shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) From 50d5cead06e51742915ac32057e97a1d0fe498f2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Sep 2023 16:55:19 -0400 Subject: [PATCH 046/126] Add prereq checks to EL cloning --- imgutil/imgutil | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 4e46ce0f..5596ef6a 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -613,7 +613,7 @@ class DebHandler(OsHandler): needpkgs = [] if not os.path.exists(os.path.join(hostpath, 'usr/bin/tpm2_getcap')): needpkgs.append('tpm2-tools') - lfuses = glob.glob('/lib/*/libfuse.so.2') + lfuses = glob.glob(os.path.join(hostpath, '/lib/*/libfuse.so.2') if not lfuses: needpkgs.append('libfuse2') if needpkgs: @@ -650,6 +650,21 @@ class ElHandler(OsHandler): self.oscategory = 'el{0}'.format(version.split('.')[0]) self.yumargs = [] super().__init__(name, version, arch, args) + needpkgs = [] + if not os.path.exists(os.path.join(hostpath, 'usr/bin/tpm2_getcap')): + needpkgs.append('tpm2-tools') + lfuses = glob.glob(os.path.join(hostpath, '/usr/lib64/libfuse.so.2') + if not lfuses: + needpkgs.append('fuse-libs') + if not os.path.exists(os.path.join(hostpath, '/usr/bin/ipcalc')): + needpkgs.append('ipcalc') + if not os.path.exists(os.path.join(hostpath, 'usr/sbin/dhclient')): + needpkgs.append('dhcp-client') + if not os.path.exists(os.path.join(hostpath, 'usr/sbin/mount.nfs')): + needpkgs.append('nfs-utils') + if needpkgs: + needapt = 'Missing packages needed in target for capture, to add required packages: dnf install ' + ' '.join(needpkgs) + self.captureprereqs.append(needapt) def add_pkglists(self): self.yumargs.extend(self.list_packages()) From b75979f3ec4eb64ec9cf3f5fa302ab812c0bd7c1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 12 Sep 2023 16:59:53 -0400 Subject: [PATCH 047/126] Insulate confluent from fatal errors from discovery subscription errors --- confluent_server/confluent/discovery/core.py | 7 +++++-- confluent_server/confluent/main.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 13b3aac0..5952529f 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -1597,7 +1597,10 @@ def remotescan(): mycfm = cfm.ConfigManager(None) myname = collective.get_myname() for remagent in get_subscriptions(): - affluent.renotify_me(remagent, mycfm, myname) + try: + affluent.renotify_me(remagent, mycfm, myname) + except Exception as e: + log.log({'error': 'Unexpected problem asking {} for discovery notifications'.format(remagent)}) def blocking_scan(): @@ -1637,7 +1640,7 @@ def start_autosense(): autosensors.add(eventlet.spawn(slp.snoop, safe_detected, slp)) #autosensors.add(eventlet.spawn(mdns.snoop, safe_detected, mdns)) autosensors.add(eventlet.spawn(pxe.snoop, safe_detected, pxe, get_node_guess_by_uuid)) - remotescan() + eventlet.spawn(remotescan) nodes_by_fprint = {} diff --git a/confluent_server/confluent/main.py b/confluent_server/confluent/main.py index f59bceb7..b49d8f56 100644 --- a/confluent_server/confluent/main.py +++ b/confluent_server/confluent/main.py @@ -326,7 +326,7 @@ def run(args): break except Exception: eventlet.sleep(0.5) - disco.start_detection() + eventlet.spawn_n(disco.start_detection) eventlet.sleep(1) consoleserver.start_console_sessions() while 1: From 74c6848a0bf8addf48f6c60aed9d1804a077be37 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 13 Sep 2023 09:59:03 -0400 Subject: [PATCH 048/126] Avoid redundant setting of known data Setting attributes can be a touch expensive, since there's a high risk of this being old news, check that discovery hasn't already set values before trying to set them again. --- confluent_server/confluent/discovery/core.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index 5952529f..a81d79e7 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -1429,7 +1429,13 @@ def discover_node(cfg, handler, info, nodename, manual): newnodeattribs['pubkeys.tls_hardwaremanager'] = \ util.get_fingerprint(handler.https_cert, 'sha256') if newnodeattribs: - cfg.set_node_attributes({nodename: newnodeattribs}) + currattrs = cfg.get_node_attributes(nodename, newnodeattribs) + for checkattr in newnodeattribs: + checkval = currattrs.get(nodename, {}).get(checkattr, {}).get('value', None) + if checkval != newnodeattribs[checkattr]: + break + else: + cfg.set_node_attributes({nodename: newnodeattribs}) log.log({'info': 'Discovered {0} ({1})'.format(nodename, handler.devname)}) if nodeconfig: @@ -1508,7 +1514,13 @@ def do_pxe_discovery(cfg, handler, info, manual, nodename, policies): if info['hwaddr'] != oldhwaddr: attribs[newattrname] = info['hwaddr'] if attribs: - cfg.set_node_attributes({nodename: attribs}) + currattrs = cfg.get_node_attributes(nodename, attribs) + for checkattr in attribs: + checkval = currattrs.get(nodename, {}).get(checkattr, {}).get('value', None) + if checkval != attribs[checkattr]: + break + else: + cfg.set_node_attributes({nodename: attribs}) if info['uuid'] in known_pxe_uuids: return True if uuid_is_valid(info['uuid']): From 97ee8e2372afe922ea27d45bb3c54e158a5a9d16 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 13 Sep 2023 10:50:21 -0400 Subject: [PATCH 049/126] Cerrect the logic of duplicate discovery protection --- confluent_server/confluent/discovery/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index a81d79e7..e18aae0e 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -1433,9 +1433,8 @@ def discover_node(cfg, handler, info, nodename, manual): for checkattr in newnodeattribs: checkval = currattrs.get(nodename, {}).get(checkattr, {}).get('value', None) if checkval != newnodeattribs[checkattr]: + cfg.set_node_attributes({nodename: newnodeattribs}) break - else: - cfg.set_node_attributes({nodename: newnodeattribs}) log.log({'info': 'Discovered {0} ({1})'.format(nodename, handler.devname)}) if nodeconfig: @@ -1518,9 +1517,8 @@ def do_pxe_discovery(cfg, handler, info, manual, nodename, policies): for checkattr in attribs: checkval = currattrs.get(nodename, {}).get(checkattr, {}).get('value', None) if checkval != attribs[checkattr]: + cfg.set_node_attributes({nodename: attribs}) break - else: - cfg.set_node_attributes({nodename: attribs}) if info['uuid'] in known_pxe_uuids: return True if uuid_is_valid(info['uuid']): From df47c6d0fde8cef2abdaa4d8d270012795723d1c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 13 Sep 2023 17:03:05 -0400 Subject: [PATCH 050/126] Disable attribute notify during json restore This is guaranteed to be a lot of churn very quickly, disable it for now. --- .../confluent/config/configmanager.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 5a392edd..50d09b1c 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -119,6 +119,7 @@ _cfgstore = None _pendingchangesets = {} _txcount = 0 _hasquorum = True +_ready = False _attraliases = { 'bmc': 'hardwaremanagement.manager', @@ -1273,6 +1274,7 @@ class ConfigManager(object): def __init__(self, tenant, decrypt=False, username=None): self.clientfiles = {} global _cfgstore + self.inrestore = False with _initlock: if _cfgstore is None: init() @@ -2089,6 +2091,10 @@ class ConfigManager(object): def _notif_attribwatchers(self, nodeattrs): if self.tenant not in self._attribwatchers: return + if self.inrestore: + # Do not stir up attribute watchers during a collective join or DB restore, + # it's too hectic of a time to react + return notifdata = {} attribwatchers = self._attribwatchers[self.tenant] for node in nodeattrs: @@ -2471,6 +2477,13 @@ class ConfigManager(object): #TODO: wait for synchronization to suceed/fail??) def _load_from_json(self, jsondata, sync=True): + self.inrestore = True + try: + _load_from_json_backend(self, jsondata, sync=True) + finally: + self.inrestore = False + + def _load_from_json_backend(self, jsondata, sync=True): """Load fresh configuration data from jsondata :param jsondata: String of jsondata @@ -2939,9 +2952,9 @@ def get_globals(): bkupglobals[globvar] = _cfgstore['globals'][globvar] return bkupglobals - def init(stateless=False): global _cfgstore + global _ready if stateless: _cfgstore = {} return @@ -2949,6 +2962,9 @@ def init(stateless=False): ConfigManager._read_from_path() except IOError: _cfgstore = {} + members = list(list_collective()) + if len(members) < 2: + _ready = True if __name__ == '__main__': From 20f02b5ef732d689b350c82a2316d4da26bf1962 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 10:07:14 -0400 Subject: [PATCH 051/126] Avoid searching switches for foreign nodes Consult collective.manager to decide to skip consideration of a node, if that node shouldn't be managed anyway. This should avoid "cross-island" behavior for such environments. --- confluent_server/confluent/collective/manager.py | 6 ++++++ confluent_server/confluent/networking/macmap.py | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py index 8668bc65..a4059bc5 100644 --- a/confluent_server/confluent/collective/manager.py +++ b/confluent_server/confluent/collective/manager.py @@ -259,6 +259,9 @@ def get_myname(): mycachedname[1] = time.time() return myname +def in_collective(): + return bool(list(cfm.list_collective())) + def handle_connection(connection, cert, request, local=False): global currentleader global retrythread @@ -815,6 +818,9 @@ def start_collective(): global follower global retrythread global initting + if initting: + # do not nest startup if startup backs up loops of starting collective + return initting = True retrythread = None try: diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py index d1377dbf..cf6012c5 100644 --- a/confluent_server/confluent/networking/macmap.py +++ b/confluent_server/confluent/networking/macmap.py @@ -49,10 +49,11 @@ import eventlet.green.select as select import eventlet.green.socket as socket - +import confluent.collective.manager as collective import confluent.exceptions as exc import confluent.log as log import confluent.messages as msg +import confluent.noderange as noderange import confluent.util as util from eventlet.greenpool import GreenPool import eventlet.green.subprocess as subprocess @@ -502,10 +503,21 @@ def _full_updatemacmap(configmanager): 'Network topology not available to tenants') # here's a list of switches... need to add nodes that are switches nodelocations = configmanager.get_node_attributes( - configmanager.list_nodes(), ('type', 'net*.switch', 'net*.switchport')) + configmanager.list_nodes(), ('type', 'collective.managercandidates', 'net*.switch', 'net*.switchport')) switches = set([]) + incollective = collective.in_collective() + if incollective: + mycollectivename = collective.get_myname() for node in nodelocations: cfg = nodelocations[node] + if incollective: + candmgrs = cfg.get('collective.managercandidates', {}).get('value', None) + if candmgrs: + candmgrs = noderange.NodeRange(candmgrs, configmanager).nodes + if mycollectivename not in candmgrs: + # do not think about trying to find nodes that we aren't possibly + # supposed to be a manager for in a collective + continue if cfg.get('type', {}).get('value', None) == 'switch': switches.add(node) for attr in cfg: From 533244458d56b4b1ee83cf05d8333babd99f6b17 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 10:37:51 -0400 Subject: [PATCH 052/126] Do not count as 'initting' until collective starts. --- confluent_server/confluent/collective/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py index a4059bc5..80c241b8 100644 --- a/confluent_server/confluent/collective/manager.py +++ b/confluent_server/confluent/collective/manager.py @@ -43,7 +43,7 @@ currentleader = None follower = None retrythread = None failovercheck = None -initting = True +initting = False reassimilate = None class ContextBool(object): From 4952e87309779b9161f7bdf2004733c7d1e2421a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 10:52:13 -0400 Subject: [PATCH 053/126] Undo collective manager changes Abort attempt to avoid duplicate startups, it was incorrect. --- confluent_server/confluent/collective/manager.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py index 80c241b8..8d98caa0 100644 --- a/confluent_server/confluent/collective/manager.py +++ b/confluent_server/confluent/collective/manager.py @@ -43,7 +43,7 @@ currentleader = None follower = None retrythread = None failovercheck = None -initting = False +initting = True reassimilate = None class ContextBool(object): @@ -818,9 +818,6 @@ def start_collective(): global follower global retrythread global initting - if initting: - # do not nest startup if startup backs up loops of starting collective - return initting = True retrythread = None try: From c0629fcce57538d0f83e257d5bc99157eb25e751 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 11:41:12 -0400 Subject: [PATCH 054/126] Fix invocation of json restore change --- confluent_server/confluent/config/configmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 50d09b1c..1ae5e579 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -2479,7 +2479,7 @@ class ConfigManager(object): def _load_from_json(self, jsondata, sync=True): self.inrestore = True try: - _load_from_json_backend(self, jsondata, sync=True) + self._load_from_json_backend(jsondata, sync=True) finally: self.inrestore = False From f2f25fe912ec6c606be40249a55be792495ab5be Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 15:25:26 -0400 Subject: [PATCH 055/126] Implement ready tracking When going through the dramatic scenario of initializing collective, take _ready down so that other code can pause operation appropriately. --- confluent_server/confluent/config/configmanager.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 1ae5e579..54888dd1 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -831,6 +831,9 @@ _oldcfgstore = None _oldtxcount = 0 +def config_is_ready(): + return _ready + def rollback_clear(): global _cfgstore global _txcount @@ -848,6 +851,8 @@ def clear_configuration(): global _txcount global _oldcfgstore global _oldtxcount + global _ready + _ready = False stop_leading() stop_following() _oldcfgstore = _cfgstore @@ -858,6 +863,7 @@ def clear_configuration(): def commit_clear(): global _oldtxcount global _oldcfgstore + global _ready # first, copy over old non-key globals, as those are # currently defined as local to each collective member # currently just 'autosense' which is intended to be active @@ -877,6 +883,7 @@ def commit_clear(): pass ConfigManager.wait_for_sync(True) ConfigManager._bg_sync_to_file() + ready = True cfgleader = None From 94b85597776aa4a4eea8f41c8b8f89898edca730 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 15:28:16 -0400 Subject: [PATCH 056/126] Declare ready on becoming leader Provide for leader scenario to correctly flag configmanager as ready. --- confluent_server/confluent/collective/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py index 8d98caa0..2519cc39 100644 --- a/confluent_server/confluent/collective/manager.py +++ b/confluent_server/confluent/collective/manager.py @@ -716,6 +716,7 @@ def become_leader(connection): if reassimilate is not None: reassimilate.kill() reassimilate = eventlet.spawn(reassimilate_missing) + cfm._ready = True if _assimilate_missing(skipaddr): schedule_rebalance() From d4c535d038c244f753697617cf32f334ec0de79e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 15:32:33 -0400 Subject: [PATCH 057/126] Halt autonomous discovery handling while configmanager is down This avoids triggering a potential large amount of churn on transiently "unknown" systems that are actually discovered. --- confluent_server/confluent/discovery/core.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/confluent_server/confluent/discovery/core.py b/confluent_server/confluent/discovery/core.py index e18aae0e..dfb50b9f 100644 --- a/confluent_server/confluent/discovery/core.py +++ b/confluent_server/confluent/discovery/core.py @@ -648,6 +648,8 @@ def detected_models(): def _recheck_nodes(nodeattribs, configmanager): + if not cfm.config_is_ready(): + return if rechecklock.locked(): # if already in progress, don't run again # it may make sense to schedule a repeat, but will try the easier and less redundant way first @@ -766,6 +768,9 @@ def eval_detected(info): def detected(info): global rechecker global rechecktime + if not cfm.config_is_ready(): + # drop processing of discovery data while configmanager is 'down' + return # later, manual and CMM discovery may act on SN and/or UUID for service in info['services']: if service in nodehandlers: From aa5de3c6a3128d11737e44028026373fafd1b45d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 15:48:37 -0400 Subject: [PATCH 058/126] Suspend handling of new socket connections while configmanager down --- confluent_server/confluent/sockapi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/sockapi.py b/confluent_server/confluent/sockapi.py index e90176ce..2d4db15b 100644 --- a/confluent_server/confluent/sockapi.py +++ b/confluent_server/confluent/sockapi.py @@ -141,6 +141,8 @@ def sessionhdl(connection, authname, skipauth=False, cert=None): if 'collective' in response: return collective.handle_connection(connection, cert, response['collective']) + while not configmanager.config_is_ready(): + eventlet.sleep(1) if 'dispatch' in response: dreq = tlvdata.recvall(connection, response['dispatch']['length']) return pluginapi.handle_dispatch(connection, cert, dreq, From 37b75ba777090b091e632de90a375dd79920f93e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 15 Sep 2023 15:54:35 -0400 Subject: [PATCH 059/126] Correct variable name on commit clear --- confluent_server/confluent/config/configmanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py index 54888dd1..9419e7fe 100644 --- a/confluent_server/confluent/config/configmanager.py +++ b/confluent_server/confluent/config/configmanager.py @@ -883,7 +883,7 @@ def commit_clear(): pass ConfigManager.wait_for_sync(True) ConfigManager._bg_sync_to_file() - ready = True + _ready = True cfgleader = None From 83e3627b47571185ac75f47ddcbad4dcc62da97c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 10:19:50 -0400 Subject: [PATCH 060/126] Add pre.d to ubuntu 22 diskful --- .../profiles/default/scripts/pre.sh | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh index ddfe598b..2f671d38 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh @@ -1,5 +1,16 @@ #!/bin/bash deploycfg=/custom-installation/confluent/confluent.deploycfg +mkdir -p /var/log/confluent +mkdir -p /opt/confluent/bin +mkdir -p /etc/confluent +cp /custom-installation/confluent/confluent.info /custom-installation/confluent/confluent.apikey /etc/confluent/ +cat /custom-installation/tls/*.pem >> /etc/confluent/ca.pem +cp /custom-installation/confluent/bin/apiclient /opt/confluent/bin +cp $deploycfg /etc/confluent/ +( +exec >> /var/log/confluent/confluent-pre.log +exec 2>> /var/log/confluent/confluent-pre.log +chmod 600 /var/log/confluent/confluent-pre.log cryptboot=$(grep encryptboot: $deploycfg|sed -e 's/^encryptboot: //') if [ "$cryptboot" != "" ] && [ "$cryptboot" != "none" ] && [ "$cryptboot" != "null" ]; then @@ -23,7 +34,16 @@ echo HostbasedAuthentication yes >> /etc/ssh/sshd_config.d/confluent.conf echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config.d/confluent.conf echo IgnoreRhosts no >> /etc/ssh/sshd_config.d/confluent.conf systemctl restart sshd +mkdir -p /etc/confluent +curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/functions > /etc/confluent/functions +. /etc/confluent/functions +run_remote_parts pre.d curl -f -X POST -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $apikey" https://$confluent_mgr/confluent-api/self/nodelist > /tmp/allnodes -curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/getinstalldisk > /custom-installation/getinstalldisk -python3 /custom-installation/getinstalldisk +if [ ! -e /tmp/installdisk ]; then + curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/getinstalldisk > /custom-installation/getinstalldisk + python3 /custom-installation/getinstalldisk +fi sed -i s!%%INSTALLDISK%%!/dev/$(cat /tmp/installdisk)! /autoinstall.yaml +) & +tail --pid $! -n 0 -F /var/log/confluent/confluent-pre.log > /dev/console + From a01b7c6503064606e767749b2c40030a7fce743a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 10:30:52 -0400 Subject: [PATCH 061/126] Revamp and add missing bits to scripted ubuntu install --- .../default/ansible/firstboot.d/README.txt | 29 +++++++++++++++++++ .../default/ansible/post.d/README.txt | 29 +++++++++++++++++++ .../profiles/default/scripts/firstboot.sh | 7 ++++- .../profiles/default/scripts/post.sh | 15 ++++++---- .../profiles/default/scripts/pre.d/.gitignore | 0 5 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 confluent_osdeploy/ubuntu22.04/profiles/default/ansible/firstboot.d/README.txt create mode 100644 confluent_osdeploy/ubuntu22.04/profiles/default/ansible/post.d/README.txt create mode 100644 confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.d/.gitignore diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/ansible/firstboot.d/README.txt b/confluent_osdeploy/ubuntu22.04/profiles/default/ansible/firstboot.d/README.txt new file mode 100644 index 00000000..ad6fc712 --- /dev/null +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/ansible/firstboot.d/README.txt @@ -0,0 +1,29 @@ +Ansible playbooks ending in .yml or .yaml that are placed into this directory will be executed at the +appropriate phase of the install process. + +Alternatively, plays may be placed in /var/lib/confluent/private/os//ansible/. +This prevents public clients from being able to read the plays, which is not necessary for them to function, +and may protect them from divulging material contained in the plays or associated roles. + +The 'hosts' may be omitted, and if included will be ignored, replaced with the host that is specifically +requesting the playbooks be executed. + +Also, the playbooks will be executed on the deployment server. Hence it may be slower in aggregate than +running content under scripts/ which ask much less of the deployment server + +Here is an example of what a playbook would look like broadly: + +- name: Example + gather_facts: no + tasks: + - name: Example1 + lineinfile: + path: /etc/hosts + line: 1.2.3.4 test1 + create: yes + - name: Example2 + lineinfile: + path: /etc/hosts + line: 1.2.3.5 test2 + create: yes + diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/ansible/post.d/README.txt b/confluent_osdeploy/ubuntu22.04/profiles/default/ansible/post.d/README.txt new file mode 100644 index 00000000..ad6fc712 --- /dev/null +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/ansible/post.d/README.txt @@ -0,0 +1,29 @@ +Ansible playbooks ending in .yml or .yaml that are placed into this directory will be executed at the +appropriate phase of the install process. + +Alternatively, plays may be placed in /var/lib/confluent/private/os//ansible/. +This prevents public clients from being able to read the plays, which is not necessary for them to function, +and may protect them from divulging material contained in the plays or associated roles. + +The 'hosts' may be omitted, and if included will be ignored, replaced with the host that is specifically +requesting the playbooks be executed. + +Also, the playbooks will be executed on the deployment server. Hence it may be slower in aggregate than +running content under scripts/ which ask much less of the deployment server + +Here is an example of what a playbook would look like broadly: + +- name: Example + gather_facts: no + tasks: + - name: Example1 + lineinfile: + path: /etc/hosts + line: 1.2.3.4 test1 + create: yes + - name: Example2 + lineinfile: + path: /etc/hosts + line: 1.2.3.5 test2 + create: yes + diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh index d14269cf..22848fe7 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh @@ -2,7 +2,10 @@ echo "Confluent first boot is running" HOME=$(getent passwd $(whoami)|cut -d: -f 6) export HOME -seems a potentially relevant thing to put i... by Jarrod Johnson +( +exec >> /target/var/log/confluent/confluent-firstboot.log +exec 2>> /target/var/log/confluent/confluent-firstboot.log +chmod 600 /target/var/log/confluent/confluent-firstboot.log cp -a /etc/confluent/ssh/* /etc/ssh/ systemctl restart sshd rootpw=$(grep ^rootpassword: /etc/confluent/confluent.deploycfg |awk '{print $2}') @@ -22,3 +25,5 @@ source /etc/confluent/functions run_remote_parts firstboot.d run_remote_config firstboot.d curl --capath /etc/confluent/tls -f -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" -X POST -d "status: complete" https://$confluent_mgr/confluent-api/self/updatestatus +) & +tail --pid $! -n 0 -F /target/var/log/confluent/confluent-post.log > /dev/console diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh index 7b970285..5f530262 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh @@ -8,7 +8,6 @@ chmod go-rwx /etc/confluent/* for i in /custom-installation/ssh/*.ca; do echo '@cert-authority *' $(cat $i) >> /target/etc/ssh/ssh_known_hosts done - cp -a /etc/ssh/ssh_host* /target/etc/confluent/ssh/ cp -a /etc/ssh/sshd_config.d/confluent.conf /target/etc/confluent/ssh/sshd_config.d/ sshconf=/target/etc/ssh/ssh_config @@ -19,10 +18,15 @@ echo 'Host *' >> $sshconf echo ' HostbasedAuthentication yes' >> $sshconf echo ' EnableSSHKeysign yes' >> $sshconf echo ' HostbasedKeyTypes *ed25519*' >> $sshconf - +cp /etc/confluent/functions /target/etc/confluent/functions +source /etc/confluent/functions +mkdir -p /target/var/log/confluent +cp /var/log/confluent/* /target/var/log/confluent/ +( +exec >> /target/var/log/confluent/confluent-post.log +exec 2>> /target/var/log/confluent/confluent-post.log +chmod 600 /target/var/log/confluent/confluent-post.log curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/firstboot.sh > /target/etc/confluent/firstboot.sh -curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/functions > /target/etc/confluent/functions -source /target/etc/confluent/functions chmod +x /target/etc/confluent/firstboot.sh cp /tmp/allnodes /target/root/.shosts cp /tmp/allnodes /target/etc/ssh/shosts.equiv @@ -85,4 +89,5 @@ source /target/etc/confluent/functions run_remote_config post umount /target/sys /target/dev /target/proc - +) & +tail --pid $! -n 0 -F /target/var/log/confluent/confluent-post.log > /dev/console diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.d/.gitignore b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.d/.gitignore new file mode 100644 index 00000000..e69de29b From 3e8c6d1ea6ccd33f497a2b5057e512376338a12c Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 11:04:50 -0400 Subject: [PATCH 062/126] Correct syntax issue in el7 image2disk --- .../el7-diskless/profiles/default/scripts/image2disk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py index 8548cebd..768aa57d 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/image2disk.py @@ -60,7 +60,7 @@ class PartedRunner(): def __init__(self, disk): self.disk = disk - def run(self, command, check=True):: + def run(self, command, check=True): command = command.split() command = ['parted', '-a', 'optimal', '-s', self.disk] + command if check: From 0a527f5f399408536f18cfd24f91056e1de77930 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 11:38:41 -0400 Subject: [PATCH 063/126] Add environment to firstboot ubuntu --- .../ubuntu22.04/profiles/default/scripts/firstboot.sh | 3 ++- .../ubuntu22.04/profiles/default/scripts/post.sh | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh index 22848fe7..c0ba44ab 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/firstboot.sh @@ -21,7 +21,8 @@ done hostnamectl set-hostname $(grep ^NODENAME: /etc/confluent/confluent.info | awk '{print $2}') touch /etc/cloud/cloud-init.disabled source /etc/confluent/functions - +confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') +export confluent_mgr confluent_profile run_remote_parts firstboot.d run_remote_config firstboot.d curl --capath /etc/confluent/tls -f -H "CONFLUENT_NODENAME: $nodename" -H "CONFLUENT_APIKEY: $confluent_apikey" -X POST -d "status: complete" https://$confluent_mgr/confluent-api/self/updatestatus diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh index 5f530262..773bf8ad 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/post.sh @@ -87,6 +87,7 @@ chroot /target bash -c "source /etc/confluent/functions; run_remote_parts post.d source /target/etc/confluent/functions run_remote_config post +python3 /opt/confluent/bin/apiclient /confluent-api/self/updatestatus -d 'status: staged' umount /target/sys /target/dev /target/proc ) & From 8f80add0f1645194c2b72b5f1cee5c660f196f79 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 15:19:10 -0400 Subject: [PATCH 064/126] Enhance debian packaging for confluent --- confluent_server/builddeb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/confluent_server/builddeb b/confluent_server/builddeb index fe2bdf96..eb17c25f 100755 --- a/confluent_server/builddeb +++ b/confluent_server/builddeb @@ -36,7 +36,7 @@ if [ "$OPKGNAME" = "confluent-server" ]; then if grep wheezy /etc/os-release; then sed -i 's/^\(Depends:.*\)/\1, python-confluent-client, python-lxml, python-eficompressor, python-pycryptodomex, python-dateutil, python-pyopenssl, python-msgpack/' debian/control else - sed -i 's/^\(Depends:.*\)/\1, confluent-client, python3-lxml, python3-eficompressor, python3-pycryptodome, python3-websocket, python3-msgpack, python3-eventlet, python3-pyparsing, python3-pyghmi, python3-paramiko/' debian/control + sed -i 's/^\(Depends:.*\)/\1, confluent-client, python3-lxml, python3-eficompressor, python3-pycryptodome, python3-websocket, python3-msgpack, python3-eventlet, python3-pyparsing, python3-pyghmi, python3-paramiko, python3-pysnmp4, python3-libarchive-c, confluent-vtbufferd/' debian/control fi if grep wheezy /etc/os-release; then echo 'confluent_client python-confluent-client' >> debian/pydist-overrides @@ -49,6 +49,13 @@ if ! grep wheezy /etc/os-release; then fi head -n -1 debian/control > debian/control1 mv debian/control1 debian/control +cat > debian/postinst << EOF +if ! getent passwd confluent > /dev/null; then + useradd -r affluent -d /var/lib/affluent -s /bin/nologin + mkdir /etc/confluent + chown confluent /etc/confluent +fi +EOF echo 'export PYBUILD_INSTALL_ARGS=--install-lib=/opt/confluent/lib/python' >> debian/rules #echo 'Provides: python-'$DPKGNAME >> debian/control #echo 'Conflicts: python-'$DPKGNAME >> debian/control From 47fc233cce8240a20d8ae5eb43914c0d827c91e0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 15:48:38 -0400 Subject: [PATCH 065/126] Fix debian packaging for confluent --- confluent_server/builddeb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_server/builddeb b/confluent_server/builddeb index eb17c25f..f71bfce4 100755 --- a/confluent_server/builddeb +++ b/confluent_server/builddeb @@ -51,8 +51,8 @@ head -n -1 debian/control > debian/control1 mv debian/control1 debian/control cat > debian/postinst << EOF if ! getent passwd confluent > /dev/null; then - useradd -r affluent -d /var/lib/affluent -s /bin/nologin - mkdir /etc/confluent + useradd -r confluent -d /var/lib/confluent -s /usr/sbin/nologin + mkdir -p /etc/confluent chown confluent /etc/confluent fi EOF From d613d0f546641ef722c06a146b9739a39035332a Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 18 Sep 2023 16:03:48 -0400 Subject: [PATCH 066/126] Add openbmc plugin for console --- .../confluent/config/attributes.py | 2 +- .../confluent/plugins/console/openbmc.py | 160 ++++++++++++++++++ 2 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 confluent_server/confluent/plugins/console/openbmc.py diff --git a/confluent_server/confluent/config/attributes.py b/confluent_server/confluent/config/attributes.py index 50f5492b..101ee03d 100644 --- a/confluent_server/confluent/config/attributes.py +++ b/confluent_server/confluent/config/attributes.py @@ -371,7 +371,7 @@ node = { 'the managed node. If not specified, then console ' 'is disabled. "ipmi" should be specified for most ' 'systems if console is desired.'), - 'validvalues': ('ssh', 'ipmi', 'tsmsol'), + 'validvalues': ('ssh', 'ipmi', 'openbmc', 'tsmsol'), }, # 'virtualization.host': { # 'description': ('Hypervisor where this node does/should reside'), diff --git a/confluent_server/confluent/plugins/console/openbmc.py b/confluent_server/confluent/plugins/console/openbmc.py new file mode 100644 index 00000000..17acae7c --- /dev/null +++ b/confluent_server/confluent/plugins/console/openbmc.py @@ -0,0 +1,160 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2015-2019 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This plugin provides an ssh implementation comforming to the 'console' +# specification. consoleserver or shellserver would be equally likely +# to use this. + +import confluent.exceptions as cexc +import confluent.interface.console as conapi +import confluent.log as log +import confluent.util as util +import pyghmi.exceptions as pygexc +import pyghmi.redfish.command as rcmd +import pyghmi.util.webclient as webclient +import eventlet +import eventlet.green.ssl as ssl +try: + websocket = eventlet.import_patched('websocket') + wso = websocket.WebSocket +except Exception: + wso = object + +def get_conn_params(node, configdata): + if 'secret.hardwaremanagementuser' in configdata: + username = configdata['secret.hardwaremanagementuser']['value'] + else: + username = 'USERID' + if 'secret.hardwaremanagementpassword' in configdata: + passphrase = configdata['secret.hardwaremanagementpassword']['value'] + else: + passphrase = 'PASSW0RD' # for lack of a better guess + if 'hardwaremanagement.manager' in configdata: + bmc = configdata['hardwaremanagement.manager']['value'] + else: + bmc = node + bmc = bmc.split('/', 1)[0] + return { + 'username': username, + 'passphrase': passphrase, + 'bmc': bmc, + } +_configattributes = ('secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassword', + 'hardwaremanagement.manager') + +class WrappedWebSocket(wso): + + def set_verify_callback(self, callback): + self._certverify = callback + + def connect(self, url, **options): + add_tls = url.startswith('wss://') + if add_tls: + hostname, port, resource, _ = websocket._url.parse_url(url) + if hostname[0] != '[' and ':' in hostname: + hostname = '[{0}]'.format(hostname) + if resource[0] != '/': + resource = '/{0}'.format(resource) + url = 'ws://{0}:443{1}'.format(hostname,resource) + else: + return super(WrappedWebSocket, self).connect(url, **options) + self.sock_opt.timeout = options.get('timeout', self.sock_opt.timeout) + self.sock, addrs = websocket._http.connect(url, self.sock_opt, websocket._http.proxy_info(**options), + options.pop('socket', None)) + self.sock = ssl.wrap_socket(self.sock, cert_reqs=ssl.CERT_NONE) + # The above is supersedeed by the _certverify, which provides + # known-hosts style cert validaiton + bincert = self.sock.getpeercert(binary_form=True) + if not self._certverify(bincert): + raise pygexc.UnrecognizedCertificate('Unknown certificate', bincert) + try: + self.handshake_response = websocket._handshake.handshake(self.sock, *addrs, **options) + if self.handshake_response.status in websocket._handshake.SUPPORTED_REDIRECT_STATUSES: + options['redirect_limit'] = options.pop('redirect_limit', 3) - 1 + if options['redirect_limit'] < 0: + raise Exception('Redirect limit hit') + url = self.handshake_response.headers['location'] + self.sock.close() + return self.connect(url, **options) + self.connected = True + except: + if self.sock: + self.sock.close() + self.sock = None + raise + + + + + + +class TsmConsole(conapi.Console): + + def __init__(self, node, config): + self.node = node + self.ws = None + configdata = config.get_node_attributes([node], _configattributes, decrypt=True) + connparams = get_conn_params(node, configdata[node]) + self.username = connparams['username'] + self.password = connparams['passphrase'] + self.bmc = connparams['bmc'] + self.origbmc = connparams['bmc'] + if ':' in self.bmc: + self.bmc = '[{0}]'.format(self.bmc) + self.datacallback = None + self.nodeconfig = config + self.connected = False + + + def recvdata(self): + while self.connected: + pendingdata = self.ws.recv() + if pendingdata == '': + self.datacallback(conapi.ConsoleEvent.Disconnect) + return + self.datacallback(pendingdata) + + def connect(self, callback): + self.datacallback = callback + kv = util.TLSCertVerifier( + self.nodeconfig, self.node, 'pubkeys.tls_hardwaremanager').verify_cert + wc = webclient.SecureHTTPConnection(self.origbmc, 443, verifycallback=kv) + rsp = wc.grab_json_response_with_status('/login', {'data': [self.username.decode('utf8'), self.password.decode("utf8")]}, headers={'Content-Type': 'application/json'}) + bmc = self.bmc + if '%' in self.bmc: + prefix = self.bmc.split('%')[0] + bmc = prefix + ']' + self.ws = WrappedWebSocket(host=bmc) + self.ws.set_verify_callback(kv) + self.ws.connect('wss://{0}/console0'.format(self.bmc), host=bmc, cookie='XSRF-TOKEN={0}; SESSION={1}'.format(wc.cookies['XSRF-TOKEN'], wc.cookies['SESSION'])) + self.connected = True + eventlet.spawn_n(self.recvdata) + return + + def write(self, data): + self.ws.send(data) + + def close(self): + if self.ws: + self.ws.close() + self.connected = False + self.datacallback = None + +def create(nodes, element, configmanager, inputdata): + if len(nodes) == 1: + return TsmConsole(nodes[0], configmanager) From 04505e1bbb53bc1d3bb1335bb14557be17946179 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 19 Sep 2023 13:01:10 -0400 Subject: [PATCH 067/126] Break out partitioning and pagkacelist for EL profiles --- confluent_osdeploy/el8/profiles/default/kickstart | 10 +--------- confluent_osdeploy/el8/profiles/default/scripts/pre.sh | 10 +++++++--- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/confluent_osdeploy/el8/profiles/default/kickstart b/confluent_osdeploy/el8/profiles/default/kickstart index fe626e93..95d4fe78 100644 --- a/confluent_osdeploy/el8/profiles/default/kickstart +++ b/confluent_osdeploy/el8/profiles/default/kickstart @@ -33,15 +33,7 @@ reboot %packages -@^minimal-environment -#-kernel-uek # This can opt out of the UEK for the relevant distribution -bind-utils -chrony -pciutils -python3 -rsync -tar --iwl*-firmware +%include /tmp/pkglist %include /tmp/addonpackages %include /tmp/cryptpkglist %end diff --git a/confluent_osdeploy/el8/profiles/default/scripts/pre.sh b/confluent_osdeploy/el8/profiles/default/scripts/pre.sh index e00ea19a..4d76aaa3 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/el8/profiles/default/scripts/pre.sh @@ -87,6 +87,7 @@ done cryptboot=$(grep ^encryptboot: /etc/confluent/confluent.deploycfg | awk '{print $2}') LUKSPARTY='' touch /tmp/cryptpkglist +touch /tmp/pkglist touch /tmp/addonpackages if [ "$cryptboot" == "tpm2" ]; then LUKSPARTY="--encrypted --passphrase=$(cat /etc/confluent/confluent.apikey)" @@ -102,15 +103,18 @@ confluentpython /opt/confluent/bin/apiclient /confluent-public/os/$confluent_pro run_remote pre.custom run_remote_parts pre.d confluentpython /etc/confluent/apiclient /confluent-public/os/$confluent_profile/kickstart -o /tmp/kickstart.base +if grep '^%include /tmp/pkglist' /tmp/kickstart.* > /dev/null; then + confluentpython /etc/confluent/apiclient /confluent-public/os/$confluent_profile/packagelist -o /tmp/pkglist +fi grep '^%include /tmp/partitioning' /tmp/kickstart.* > /dev/null || touch /tmp/installdisk if [ ! -e /tmp/installdisk ]; then run_remote_python getinstalldisk fi +confluentpython /etc/confluent/apiclient /confluent-public/os/$confluent_profile/partitioning -o /tmp/partitioning.template grep '^%include /tmp/partitioning' /tmp/kickstart.* > /dev/null || rm /tmp/installdisk if [ -e /tmp/installdisk -a ! -e /tmp/partitioning ]; then - echo clearpart --all --initlabel >> /tmp/partitioning - echo ignoredisk --only-use $(cat /tmp/installdisk) >> /tmp/partitioning - echo autopart --nohome $LUKSPARTY >> /tmp/partitioning + INSTALLDISK=$(cat /tmp/installdisk) + sed -e s/%%INSTALLDISK%%/$INSTALLDISK/ -e s/%%LUKSHOOK%%/$LUKSPARTY/ /tmp/partitioning.template > /tmp/partitioning dd if=/dev/zero of=/dev/$(cat /tmp/installdisk) bs=1M count=1 >& /dev/null vgchange -a n >& /dev/null fi From d0c97b762365b2405761d28b340acc8d35f91b7e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 19 Sep 2023 13:14:07 -0400 Subject: [PATCH 068/126] Add pieces of EL profiles --- confluent_osdeploy/el8/profiles/default/packagelist | 9 +++++++++ confluent_osdeploy/el8/profiles/default/partitioning | 4 ++++ 2 files changed, 13 insertions(+) create mode 100644 confluent_osdeploy/el8/profiles/default/packagelist create mode 100644 confluent_osdeploy/el8/profiles/default/partitioning diff --git a/confluent_osdeploy/el8/profiles/default/packagelist b/confluent_osdeploy/el8/profiles/default/packagelist new file mode 100644 index 00000000..4e3b9681 --- /dev/null +++ b/confluent_osdeploy/el8/profiles/default/packagelist @@ -0,0 +1,9 @@ +@^minimal-environment +#-kernel-uek # This can opt out of the UEK for the relevant distribution +bind-utils +chrony +pciutils +python3 +rsync +tar +-iwl*-firmware diff --git a/confluent_osdeploy/el8/profiles/default/partitioning b/confluent_osdeploy/el8/profiles/default/partitioning new file mode 100644 index 00000000..c11b135b --- /dev/null +++ b/confluent_osdeploy/el8/profiles/default/partitioning @@ -0,0 +1,4 @@ +clearpart --all --initlabel +ignoredisk --only-use %%INSTALLDISK%% +autopart --nohome %%LUKSHOOK%% + From f88b44dee2bfbcfb8d08d1d8d7c87e67c0c7b680 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 20 Sep 2023 10:13:34 -0400 Subject: [PATCH 069/126] Fix issues with imgutil --- imgutil/imgutil | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index 5596ef6a..de3a9025 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -613,7 +613,7 @@ class DebHandler(OsHandler): needpkgs = [] if not os.path.exists(os.path.join(hostpath, 'usr/bin/tpm2_getcap')): needpkgs.append('tpm2-tools') - lfuses = glob.glob(os.path.join(hostpath, '/lib/*/libfuse.so.2') + lfuses = glob.glob(os.path.join(hostpath, '/lib/*/libfuse.so.2')) if not lfuses: needpkgs.append('libfuse2') if needpkgs: @@ -646,14 +646,16 @@ class DebHandler(OsHandler): class ElHandler(OsHandler): - def __init__(self, name, version, arch, args): + def __init__(self, name, version, arch, args, hostpath='/'): self.oscategory = 'el{0}'.format(version.split('.')[0]) self.yumargs = [] super().__init__(name, version, arch, args) needpkgs = [] + if not hostpath: + return if not os.path.exists(os.path.join(hostpath, 'usr/bin/tpm2_getcap')): needpkgs.append('tpm2-tools') - lfuses = glob.glob(os.path.join(hostpath, '/usr/lib64/libfuse.so.2') + lfuses = glob.glob(os.path.join(hostpath, '/usr/lib64/libfuse.so.2')) if not lfuses: needpkgs.append('fuse-libs') if not os.path.exists(os.path.join(hostpath, '/usr/bin/ipcalc')): @@ -1040,7 +1042,7 @@ def fingerprint_source_el(files, sourcepath, args): if arch == 'noarch': prodinfo = open(os.path.join(sourcepath, '.discinfo')).read() arch = prodinfo.split('\n')[2] - return ElHandler(osname, ver, arch, args) + return ElHandler(osname, ver, arch, args, None) return None @@ -1092,7 +1094,7 @@ def fingerprint_host_el(args, hostpath='/'): osname = osname.replace('-release', '').replace('-', '_') if osname == 'centos_linux': osname = 'centos' - return ElHandler(osname, version, os.uname().machine, args) + return ElHandler(osname, version, os.uname().machine, args, hostpath) def fingerprint_host_deb(args, hostpath='/'): From a00fd325aa4a5b0d9a9a55b4b9b01e8426da045d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 27 Sep 2023 13:09:23 -0400 Subject: [PATCH 070/126] Export variables for ubuntu pre.d run --- confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh index 2f671d38..4ff1878e 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh @@ -35,6 +35,7 @@ echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config.d/confluent.con echo IgnoreRhosts no >> /etc/ssh/sshd_config.d/confluent.conf systemctl restart sshd mkdir -p /etc/confluent +export confluent_profile confluent_mgr curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/functions > /etc/confluent/functions . /etc/confluent/functions run_remote_parts pre.d From 79e3ad53f880a92041da6dab5d13b27c0d58c60b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 29 Sep 2023 16:23:59 -0400 Subject: [PATCH 071/126] Add server side rack layout organization The info is hard to put together client side, but supremely easy server side. Provide a nice call to get the layout for a noderange, similar to (but better than) current GUI code. Now GUI can get a nice canned JSON description of the layout. --- confluent_server/confluent/core.py | 1 + confluent_server/confluent/messages.py | 16 +++ .../confluent/plugins/info/layout.py | 100 ++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 confluent_server/confluent/plugins/info/layout.py diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index a9ee1dba..f70bc6ae 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -446,6 +446,7 @@ def _init_core(): }, }, }, + 'layout': PluginRoute({'handler': 'layout'}), 'media': { 'uploads': PluginCollection({ 'pluginattrs': ['hardwaremanagement.method'], diff --git a/confluent_server/confluent/messages.py b/confluent_server/confluent/messages.py index a24a4d78..ce36344d 100644 --- a/confluent_server/confluent/messages.py +++ b/confluent_server/confluent/messages.py @@ -92,6 +92,7 @@ def msg_deserialize(packed): return cls(*m[1:]) raise Exception("Unknown shenanigans") + class ConfluentMessage(object): apicode = 200 readonly = False @@ -254,6 +255,21 @@ class ConfluentNodeError(object): raise Exception('{0}: {1}'.format(self.node, self.error)) +class Generic(ConfluentMessage): + + def __init__(self, data): + self.data = data + + def json(self): + return json.dumps(self.data) + + def raw(self): + return self.data + + def html(self): + return json.dumps(self.data) + + class ConfluentResourceUnavailable(ConfluentNodeError): apicode = 503 diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py new file mode 100644 index 00000000..8397af7f --- /dev/null +++ b/confluent_server/confluent/plugins/info/layout.py @@ -0,0 +1,100 @@ +# Copyright 2023 Lenovo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import confluent.core as core +import confluent.messages as msg + +def retrieve(nodes, element, configmanager, inputdata): + locationinfo = configmanager.get_node_attributes(nodes, + (u'enclosure.manager', u'enclosure.bay', u'location.rack', + u'location.row', u'location.u', u'location.height')) + enclosuremap = {} + rackmap = {} + allnodedata = {} + needenclosures = set([]) + locatednodes = set([]) + for node in locationinfo: + nodeinfo = locationinfo[node] + rack = nodeinfo.get(u'location.rack', {}).get('value', '') + u = nodeinfo.get(u'location.u', {}).get('value', None) + row = nodeinfo.get(u'location.row', {}).get('value', '') + enclosure = nodeinfo.get(u'enclosure.manager', {}).get('value', None) + bay = nodeinfo.get(u'enclosure.bay', {}).get('value', None) + height = nodeinfo.get(u'location.height', {}).get('value', None) + if enclosure: + if enclosure not in enclosuremap: + enclosuremap[enclosure] = {} + enclosuremap[enclosure][bay] = node + if u: + if row not in rackmap: + rackmap[row] = {} + if rack not in rackmap[row]: + rackmap[row][rack] = {} + rackmap[row][rack][u] = {'node': enclosure, 'children': enclosuremap[enclosure]} + allnodedata[enclosure] = rackmap[row][rack][u] + if height: + allnodedata[enclosure]['height'] = height + else: # need to see if enclosure lands in the map naturally or need to pull it + needenclosures.add(enclosure) + elif u: + if row not in rackmap: + rackmap[row] = {} + if rack not in rackmap[row]: + rackmap[row][rack] = {} + rackmap[row][rack][u] = {'node': node} + allnodedata[node] = rackmap[row][rack][u] + if height: + allnodedata[node]['height'] = height + locatednodes.add(node) + cfgenc = needenclosures - locatednodes + locationinfo = configmanager.get_node_attributes(cfgenc, (u'location.rack', u'location.row', u'location.u', u'location.height')) + for enclosure in locationinfo: + nodeinfo = locationinfo[enclosure] + rack = nodeinfo.get(u'location.rack', {}).get('value', '') + u = nodeinfo.get(u'location.u', {}).get('value', None) + row = nodeinfo.get(u'location.row', {}).get('value', '') + height = nodeinfo.get(u'location.height', {}).get('value', None) + if u: + allnodedata[enclosure] = {'node': enclosure, 'children': enclosuremap[enclosure]} + if height: + allnodedata[enclosure]['height'] = height + if row not in rackmap: + rackmap[row] = {} + if rack not in rackmap[row]: + rackmap[row][rack] = {} + rackmap[row][rack][u] = allnodedata[enclosure] + results = { + 'errors': [], + 'locations': rackmap, + } + for enclosure in enclosuremap: + if enclosure not in allnodedata: + results['errors'].append('Enclosure {} is missing required location information'.format(enclosure)) + else: + allnodedata[enclosure]['children'] = enclosuremap[enclosure] + needheight = set([]) + for node in allnodedata: + if 'height' not in allnodedata[node]: + needheight.add(node) + needheight = ','.join(needheight) + if needheight: + for rsp in core.handle_path( + '/noderange/{0}/description'.format(needheight), + 'retrieve', configmanager, + inputdata=None): + kvp = rsp.kvpairs + for node in kvp: + allnodedata[node]['height'] = kvp[node]['height'] + yield msg.Generic(results) + From ef9083062bbb75e4e96c6206f921ce63b7247307 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 3 Oct 2023 10:13:53 -0400 Subject: [PATCH 072/126] Make multiple attempts to fetch networking configuration Since confignet runs early in startup, the networking can be a bit fickle. Tolerate outages during early use. --- .../common/profile/scripts/confignet | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index dec1808d..f2a2edff 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -435,10 +435,26 @@ if __name__ == '__main__': curridx = addr[-1] if curridx in doneidxs: continue - status, nc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/netcfg') + for tries in (1, 2 3): + try: + status, nc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/netcfg') + break + except Exception: + if tries == 3: + raise + time.sleep(1) + continue nc = json.loads(nc) if not dc: - status, dc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/deploycfg2') + for tries in (1, 2 3): + try: + status, dc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/deploycfg2') + break + except Exception: + if tries == 3: + raise + time.sleep(1) + continue dc = json.loads(dc) iname = get_interface_name(idxmap[curridx], nc.get('default', {})) if iname: From ee19386d8c1cb4e4a33b7ec79b2d7dc0fe632976 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 4 Oct 2023 09:49:09 -0400 Subject: [PATCH 073/126] Export nodename in ubuntu pre --- confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh index 4ff1878e..5db222a7 100755 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/pre.sh @@ -35,7 +35,7 @@ echo HostbasedUsesNameFromPacketOnly yes >> /etc/ssh/sshd_config.d/confluent.con echo IgnoreRhosts no >> /etc/ssh/sshd_config.d/confluent.conf systemctl restart sshd mkdir -p /etc/confluent -export confluent_profile confluent_mgr +export nodename confluent_profile confluent_mgr curl -f https://$confluent_mgr/confluent-public/os/$confluent_profile/scripts/functions > /etc/confluent/functions . /etc/confluent/functions run_remote_parts pre.d From 9f168aee7302412f91aaa297000645b4f02162fe Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 10:28:16 -0400 Subject: [PATCH 074/126] docs- batch file systax --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index b1330198..927ce5ae 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -61,7 +61,9 @@ to a blank value will allow masking a group defined attribute with an empty valu or environment variables. * `-s`, `--set`: - Set attributes using a batch file + Set attributes using a batch file rather than the command line. The attributes in the batch file + can be one line of key=value pairs or each attribute can be in its own line. Lines that start with + # sign will be read as commend. See EXAMPLES for batch file syntax. * `-m MAXNODES`, `--maxnodes=MAXNODES`: Prompt if trying to set attributes on more than @@ -120,6 +122,25 @@ to a blank value will allow masking a group defined attribute with an empty valu `d1: net.pxe.switch: pxeswitch1` `d1: net.switch:` +* Setting Attributes using a batch file with syntax similar to command line: + `# cat nodeattributes.batch` + `# power` + `power.psu1.outlet=3 power.psu1.pdu=pdu2` + `# nodeattrib n41 -s nodeattributes.batch` + `n41: 3` + `n41: pdu2` + +* Setting Attributes using a batch file with syntax where each attribute is in its own line: + `# cat nodeattributes.batch` + `# management` + `custom.mgt.switch=switch_main` + `custom.mgt.switch.port=swp4` + `# nodeattrib n41 -s nodeattributes.batch` + `n41: switch_main` + `n41: swp4` + + + ## SEE ALSO nodegroupattrib(8), nodeattribexpressions(5) From d299db3442f3341f348c4e560397c7a493f9eaf7 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 10:31:24 -0400 Subject: [PATCH 075/126] doc --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index 927ce5ae..28e37a5c 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -122,7 +122,7 @@ to a blank value will allow masking a group defined attribute with an empty valu `d1: net.pxe.switch: pxeswitch1` `d1: net.switch:` -* Setting Attributes using a batch file with syntax similar to command line: +* Setting attributes using a batch file with syntax similar to command line: `# cat nodeattributes.batch` `# power` `power.psu1.outlet=3 power.psu1.pdu=pdu2` @@ -130,7 +130,7 @@ to a blank value will allow masking a group defined attribute with an empty valu `n41: 3` `n41: pdu2` -* Setting Attributes using a batch file with syntax where each attribute is in its own line: +* Setting attributes using a batch file with syntax where each attribute is in its own line: `# cat nodeattributes.batch` `# management` `custom.mgt.switch=switch_main` From c8094276d0f9dc5129c740b32285adf66746cd11 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 10:34:07 -0400 Subject: [PATCH 076/126] typ0_fix --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index 28e37a5c..cacfc80f 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -62,8 +62,9 @@ to a blank value will allow masking a group defined attribute with an empty valu * `-s`, `--set`: Set attributes using a batch file rather than the command line. The attributes in the batch file - can be one line of key=value pairs or each attribute can be in its own line. Lines that start with - # sign will be read as commend. See EXAMPLES for batch file syntax. + can be specified as one line of key=value pairs line command line or each attribute can be in + its own line. Lines that start with # sign will be read as a comment. See EXAMPLES for batch + file syntax. * `-m MAXNODES`, `--maxnodes=MAXNODES`: Prompt if trying to set attributes on more than From ba90609f3b4893f258ac8f66b83edcd166dccaba Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 10:36:58 -0400 Subject: [PATCH 077/126] documentation for nodeattrib -s --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index cacfc80f..3d66a65f 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -62,8 +62,8 @@ to a blank value will allow masking a group defined attribute with an empty valu * `-s`, `--set`: Set attributes using a batch file rather than the command line. The attributes in the batch file - can be specified as one line of key=value pairs line command line or each attribute can be in - its own line. Lines that start with # sign will be read as a comment. See EXAMPLES for batch + can be specified as one line of key=value pairs simmilar to command line or each attribute can + be in its own line. Lines that start with # sign will be read as a comment. See EXAMPLES for batch file syntax. * `-m MAXNODES`, `--maxnodes=MAXNODES`: From eca1854d563e718c0bc4030d7b0076c22d12bcf1 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 10:37:41 -0400 Subject: [PATCH 078/126] fix to env doc --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index 3d66a65f..c71e59e7 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -54,7 +54,7 @@ to a blank value will allow masking a group defined attribute with an empty valu * `-e`, `--environment`: Set specified attributes based on exported environment variable of matching name. Environment variable names may be lower case or all upper case. - Replace . with _ as needed (e.g. info.note may be specified as either $info_note or $INFO_NOTE + Replace . with _ as needed (e.g. info.note may be specified as either $info_note or $INFO_NOTE) * `-p`, `--prompt`: Request interactive prompting to provide values rather than the command line From 67f607a8f16bb534ecc1a001611b112cb55c7cf7 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 16:26:20 -0400 Subject: [PATCH 079/126] fix to synopsis --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index c71e59e7..c0316a7d 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -8,7 +8,7 @@ nodeattrib(8) -- List or change confluent nodes attributes `nodeattrib -c ...` `nodeattrib -e ...` `nodeattrib -p ...` -`nodeattrib -s ` +`nodeattrib -s ...` ## DESCRIPTION From 2e84c73baaa05cfd3840d3f18afc78926bb55bfd Mon Sep 17 00:00:00 2001 From: tkucherera Date: Wed, 4 Oct 2023 16:27:05 -0400 Subject: [PATCH 080/126] '' --- confluent_client/doc/man/nodeattrib.ronn.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_client/doc/man/nodeattrib.ronn.tmpl b/confluent_client/doc/man/nodeattrib.ronn.tmpl index c0316a7d..c8127ad8 100644 --- a/confluent_client/doc/man/nodeattrib.ronn.tmpl +++ b/confluent_client/doc/man/nodeattrib.ronn.tmpl @@ -7,7 +7,7 @@ nodeattrib(8) -- List or change confluent nodes attributes `nodeattrib [ ...]` `nodeattrib -c ...` `nodeattrib -e ...` -`nodeattrib -p ...` +`nodeattrib -p ...` `nodeattrib -s ...` ## DESCRIPTION From 77eec1a791ee37ba15e4e2316dc9ed1369647c44 Mon Sep 17 00:00:00 2001 From: tkucherera Date: Thu, 5 Oct 2023 11:44:04 -0400 Subject: [PATCH 081/126] missing_shlex import in nodeattrib --- confluent_client/bin/nodeattrib | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_client/bin/nodeattrib b/confluent_client/bin/nodeattrib index 8c6f078d..f4b0331f 100755 --- a/confluent_client/bin/nodeattrib +++ b/confluent_client/bin/nodeattrib @@ -22,6 +22,7 @@ import optparse import os import signal import sys +import shlex try: signal.signal(signal.SIGPIPE, signal.SIG_DFL) From a4ea5e5c4b2dda518af813acdd3de61f1fdb5148 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Sat, 7 Oct 2023 09:51:32 -0400 Subject: [PATCH 082/126] Abbreviate sequential nodes When we have sequential nodes, collapse to ':' delimited range. --- confluent_server/confluent/noderange.py | 112 +++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index b59bf7c6..5021d592 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -55,6 +55,92 @@ def humanify_nodename(nodename): return [int(text) if text.isdigit() else text.lower() for text in re.split(numregex, nodename)] +def unnumber_nodename(nodename): + # stub out numbers + chunked = ["{}" if text.isdigit() else text.lower() + for text in re.split(numregex, nodename)] + return chunked + +def getnumbers_nodename(nodename): + return [int(x) for x in re.split(numregex, nodename) if x.isdigit()] + +def group_elements(elems): + """ Take the specefied elements and chunk them according to text similarity + """ + prev = None + currchunk = [] + chunked_elems = [currchunk] + for elem in elems: + elemtxt = unnumber_nodename(elem) + if not prev: + prev = elemtxt + currchunk.append(elem) + continue + if prev == elemtxt: + currchunk.append(elem) + else: + currchunk = [elem] + chunked_elems.append(currchunk) + prev = elemtxt + return chunked_elems + +def abbreviate_chunk(chunk, validset): + if len(chunk) < 3: + return sorted(chunk, key=humanify_nodename) + #chunk = sorted(chunk, key=humanify_nodename) + vset = set(validset) + cset = set(chunk) + mins = None + maxs = None + for name in chunk: + currns = getnumbers_nodename(name) + if mins is None: + mins = list(currns) + maxs = list(currns) + continue + for n in range(len(currns)): + if currns[n] < mins[n]: + mins[n] = currns[n] + if currns[n] > maxs[n]: + maxs[n] = currns[n] + tmplt = ''.join(unnumber_nodename(chunk[0])) + bgnr = tmplt.format(*mins) + endr = tmplt.format(*maxs) + nr = '{}:{}'.format(bgnr, endr) + prospect = NodeRange(nr).nodes + ranges = [] + discontinuities = (prospect - vset).union(prospect - cset) + currstart = None + prevnode = None + chunksize = 0 + prospect = sorted(prospect, key=humanify_nodename) + currstart = prospect[0] + while prospect: + currnode = prospect.pop(0) + if currnode in discontinuities: + if chunksize == 0: + continue + elif chunksize == 1: + ranges.append(prevnode) + elif chunksize == 2: + ranges.append(','.join([currstart, prevnode])) + else: + ranges.append(':'.join([currstart, prevnode])) + chunksize = 0 + currstart = None + continue + elif not currstart: + currstart = currnode + chunksize += 1 + prevnode = currnode + if chunksize == 1: + ranges.append(prevnode) + elif chunksize == 2: + ranges.append(','.join([currstart, prevnode])) + elif chunksize != 0: + ranges.append(':'.join([currstart, prevnode])) + return ranges + class ReverseNodeRange(object): """Abbreviate a set of nodes to a shorter noderange representation @@ -71,7 +157,8 @@ class ReverseNodeRange(object): @property def noderange(self): subsetgroups = [] - for group in self.cfm.get_groups(sizesort=True): + allgroups = self.cfm.get_groups(sizesort=True) + for group in allgroups: if lastnoderange: for nr in lastnoderange: if lastnoderange[nr] - self.nodes: @@ -88,7 +175,28 @@ class ReverseNodeRange(object): self.nodes -= nl if not self.nodes: break - return ','.join(sorted(subsetgroups) + sorted(self.nodes)) + # then, analyze sequentially identifying matching alpha subsections + # then try out noderange from beginning to end + # we need to know discontinuities, which are either: + # nodes that appear in the noderange that are not in the nodes + # nodes that do not exist at all (we need a noderange modification + # that returns non existing nodes) + ranges = [] + try: + subsetgroups.sort(key=humanify_nodename) + groupchunks = group_elements(subsetgroups) + for gc in groupchunks: + ranges.extend(abbreviate_chunk(gc, allgroups)) + except Exception: + subsetgroups.sort() + try: + nodes = sorted(self.nodes, key=humanify_nodename) + nodechunks = group_elements(nodes) + for nc in nodechunks: + ranges.extend(abbreviate_chunk(nc, self.cfm.list_nodes())) + except Exception: + ranges = sorted(self.nodes) + return ','.join(ranges) From fe27cdea4a8df9bd10de5a6dd340a3716413a9ce Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 9 Oct 2023 17:18:44 -0400 Subject: [PATCH 083/126] Abbreviate harder, using brackets Add a round that collapses as is convenient to bracketed range. --- confluent_server/confluent/noderange.py | 109 +++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 5021d592..e70fa6fe 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -64,6 +64,105 @@ def unnumber_nodename(nodename): def getnumbers_nodename(nodename): return [int(x) for x in re.split(numregex, nodename) if x.isdigit()] + +class Bracketer(object): + __slots__ = ['sequences', 'count', 'nametmpl', 'diffn', 'tokens'] + + def __init__(self, nodename): + self.sequences = [] + realnodename = nodename + if ':' in nodename: + realnodename = nodename.split(':', 1)[0] + self.count = len(getnumbers_nodename(realnodename)) + self.nametmpl = unnumber_nodename(realnodename) + for n in range(self.count): + self.sequences.append(None) + self.diffn = None + self.tokens = [] + self.extend(nodename) + + def extend(self, nodeorseq): + # can only differentiate a single number + endname = None + endnums = None + enddifn = self.diffn + if ':' in nodeorseq: + nodename, endname = nodeorseq.split(':', 1) + else: + nodename = nodeorseq + nums = getnumbers_nodename(nodename) + if endname: + diffcount = 0 + endnums = getnumbers_nodename(endname) + ecount = len(endnums) + if ecount != self.count: + raise Exception("mismatched names passed") + for n in range(ecount): + if endnums[n] != nums[n]: + enddifn = n + diffcount += 1 + if diffcount > 1: + if self.sequences: + self.flush_current() + self.tokens.append(nodeorseq) # TODO: could just abbreviate this with multiple []... + return + for n in range(self.count): + if endnums and endnums[n] != nums[n]: + outval = '{}:{}'.format(nums[n], endnums[n]) + else: + outval = '{}'.format(nums[n]) + if self.sequences[n] is None: + # We initialize to text pieces, 'currstart', and 'prev' number + self.sequences[n] = [[outval], nums[n], nums[n]] + elif self.sequences[n][2] == nums[n]: + continue # new nodename has no new number, keep going + elif self.sequences[n][2] != nums[n]: + if self.diffn is not None and (n != self.diffn or enddifn != n): + self.flush_current() + self.sequences[n] = [[], nums[n], nums[n]] + self.diffn = n + self.sequences[n][0].append(outval) + self.sequences[n][2] = nums[n] + elif False: # previous attempt + # A discontinuity, need to close off previous chunk + currstart = self.sequences[n][1] + prevnum = self.sequences[n][2] + if currstart == prevnum: + self.sequences[n][0].append('{}'.format(currstart)) + elif prevnum == currstart + 1: + self.sequences[n][0].append('{},{}'.format(currstart, prevnum)) + else: + self.sequences[n][0].append('{}:{}'.format(currstart, prevnum)) + self.sequences[n][1] = nums[n] + self.sequences[n][2] = nums[n] + elif False: # self.sequences[n][2] == nums[n] - 1: # sequential, increment prev + self.sequences[n][2] = nums[n] + else: + raise Exception('Decreasing node in extend call, not supported') + + def flush_current(self): + txtfields = [] + for n in range(self.count): + txtfield = ','.join(self.sequences[n][0]) + #if self.sequences[n][1] == self.sequences[n][2]: + # txtfield.append('{}'.format(self.sequences[n][1])) + #else: + # txtfield.append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) + if txtfield.isdigit(): + txtfields.append(txtfield) + else: + txtfields.append('[{}]'.format(txtfield)) + self.tokens.append(''.join(self.nametmpl).format(*txtfields)) + self.sequences = [] + for n in range(self.count): + self.sequences.append(None) + + @property + def range(self): + if self.sequences: + self.flush_current() + return ','.join(self.tokens) + def group_elements(elems): """ Take the specefied elements and chunk them according to text similarity """ @@ -123,7 +222,7 @@ def abbreviate_chunk(chunk, validset): elif chunksize == 1: ranges.append(prevnode) elif chunksize == 2: - ranges.append(','.join([currstart, prevnode])) + ranges.extend([currstart, prevnode]) else: ranges.append(':'.join([currstart, prevnode])) chunksize = 0 @@ -136,7 +235,7 @@ def abbreviate_chunk(chunk, validset): if chunksize == 1: ranges.append(prevnode) elif chunksize == 2: - ranges.append(','.join([currstart, prevnode])) + ranges.extend([currstart, prevnode]) elif chunksize != 0: ranges.append(':'.join([currstart, prevnode])) return ranges @@ -193,7 +292,11 @@ class ReverseNodeRange(object): nodes = sorted(self.nodes, key=humanify_nodename) nodechunks = group_elements(nodes) for nc in nodechunks: - ranges.extend(abbreviate_chunk(nc, self.cfm.list_nodes())) + currchunks = abbreviate_chunk(nc, self.cfm.list_nodes()) + bracketer = Bracketer(currchunks[0]) + for chnk in currchunks[1:]: + bracketer.extend(chnk) + ranges.append(bracketer.range) except Exception: ranges = sorted(self.nodes) return ','.join(ranges) From c254564f021264b7de200507882f8a57715a6926 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 10 Oct 2023 12:47:19 -0400 Subject: [PATCH 084/126] Fully give up on multi-iterator abbreviation There's too many cases that can go wrong. Note that with this lower ambition, it would be possible to significantly streamline the implementation. Notably, the 'find discontinuities' approach was selected to *try* to support multiple iterators, but since that didn't pan out, a more straightforward numerical strategy can be used from the onset. --- confluent_server/confluent/noderange.py | 78 ++++++++++++++++--------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index e70fa6fe..e2f88b74 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -100,11 +100,19 @@ class Bracketer(object): for n in range(ecount): if endnums[n] != nums[n]: enddifn = n + if self.diffn is None: + self.diffn = enddifn diffcount += 1 - if diffcount > 1: + if diffcount > 1 or enddifn != self.diffn: if self.sequences: self.flush_current() - self.tokens.append(nodeorseq) # TODO: could just abbreviate this with multiple []... + txtfields = [] + for idx in range(len(nums)): + if endnums[idx] == nums[idx]: + txtfields.append(nums[idx]) + else: + txtfields.append('[{}:{}]'.format(nums[idx], endnums[idx])) + self.tokens.append(''.join(self.nametmpl).format(*txtfields)) return for n in range(self.count): if endnums and endnums[n] != nums[n]: @@ -142,17 +150,18 @@ class Bracketer(object): def flush_current(self): txtfields = [] - for n in range(self.count): - txtfield = ','.join(self.sequences[n][0]) - #if self.sequences[n][1] == self.sequences[n][2]: - # txtfield.append('{}'.format(self.sequences[n][1])) - #else: - # txtfield.append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) - if txtfield.isdigit(): - txtfields.append(txtfield) - else: - txtfields.append('[{}]'.format(txtfield)) - self.tokens.append(''.join(self.nametmpl).format(*txtfields)) + if self.sequences and self.sequences[0] is not None: + for n in range(self.count): + txtfield = ','.join(self.sequences[n][0]) + #if self.sequences[n][1] == self.sequences[n][2]: + # txtfield.append('{}'.format(self.sequences[n][1])) + #else: + # txtfield.append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) + if txtfield.isdigit(): + txtfields.append(txtfield) + else: + txtfields.append('[{}]'.format(txtfield)) + self.tokens.append(''.join(self.nametmpl).format(*txtfields)) self.sequences = [] for n in range(self.count): self.sequences.append(None) @@ -186,28 +195,41 @@ def group_elements(elems): def abbreviate_chunk(chunk, validset): if len(chunk) < 3: return sorted(chunk, key=humanify_nodename) - #chunk = sorted(chunk, key=humanify_nodename) vset = set(validset) cset = set(chunk) - mins = None - maxs = None + minmaxes = [None] + diffn = None + prevns = None for name in chunk: currns = getnumbers_nodename(name) - if mins is None: - mins = list(currns) - maxs = list(currns) + if minmaxes[-1] is None: + minmaxes[-1] = [list(currns), list(currns)] continue + if prevns is None: + prevns = currns for n in range(len(currns)): - if currns[n] < mins[n]: - mins[n] = currns[n] - if currns[n] > maxs[n]: - maxs[n] = currns[n] + if prevns[n] != currns[n]: + if diffn is None: + diffn = n + elif diffn != n: + minmaxes.append([list(currns), list(currns)]) + continue + if currns[n] < minmaxes[-1][0][n]: + minmaxes.append([list(currns), list(currns)]) + if currns[n] > minmaxes[-1][1][n]: + minmaxes[-1][1][n] = currns[n] + prevns = currns tmplt = ''.join(unnumber_nodename(chunk[0])) + ranges = [] + for x in minmaxes: + process_abbreviation(vset, cset, x[0], x[1], tmplt, ranges) + return ranges + +def process_abbreviation(vset, cset, mins, maxs, tmplt, ranges): bgnr = tmplt.format(*mins) endr = tmplt.format(*maxs) nr = '{}:{}'.format(bgnr, endr) prospect = NodeRange(nr).nodes - ranges = [] discontinuities = (prospect - vset).union(prospect - cset) currstart = None prevnode = None @@ -238,8 +260,6 @@ def abbreviate_chunk(chunk, validset): ranges.extend([currstart, prevnode]) elif chunksize != 0: ranges.append(':'.join([currstart, prevnode])) - return ranges - class ReverseNodeRange(object): """Abbreviate a set of nodes to a shorter noderange representation @@ -285,7 +305,11 @@ class ReverseNodeRange(object): subsetgroups.sort(key=humanify_nodename) groupchunks = group_elements(subsetgroups) for gc in groupchunks: - ranges.extend(abbreviate_chunk(gc, allgroups)) + currchunks = abbreviate_chunk(gc, allgroups) + bracketer = Bracketer(currchunks[0]) + for chnk in currchunks[1:]: + bracketer.extend(chnk) + ranges.append(bracketer.range) except Exception: subsetgroups.sort() try: From e9a2f57ad8d262dae5da54bdddfc1906a0e652a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 10 Oct 2023 16:56:32 -0400 Subject: [PATCH 085/126] Simplify the noderange abbreviation Since the multi-iterator ambition is out, ditch the expensive set wrangling step. Now the procedure is: -Suck nodes into groups, as possible -Separately for groups and nodes: -Sort the elements -Chunk the elements based on 'non-numberical' situation matching -analyze the iterators to apply [] to shorten the name -Multi-iterator will cause a discontinuity, and a new ',' delimited name gets constructed --- confluent_server/confluent/noderange.py | 146 ++++-------------------- 1 file changed, 23 insertions(+), 123 deletions(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index e2f88b74..9a7ed44b 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -80,70 +80,39 @@ class Bracketer(object): self.diffn = None self.tokens = [] self.extend(nodename) + if self.count == 0: + self.tokens = [nodename] def extend(self, nodeorseq): # can only differentiate a single number endname = None endnums = None - enddifn = self.diffn if ':' in nodeorseq: nodename, endname = nodeorseq.split(':', 1) else: nodename = nodeorseq nums = getnumbers_nodename(nodename) - if endname: - diffcount = 0 - endnums = getnumbers_nodename(endname) - ecount = len(endnums) - if ecount != self.count: - raise Exception("mismatched names passed") - for n in range(ecount): - if endnums[n] != nums[n]: - enddifn = n - if self.diffn is None: - self.diffn = enddifn - diffcount += 1 - if diffcount > 1 or enddifn != self.diffn: - if self.sequences: - self.flush_current() - txtfields = [] - for idx in range(len(nums)): - if endnums[idx] == nums[idx]: - txtfields.append(nums[idx]) - else: - txtfields.append('[{}:{}]'.format(nums[idx], endnums[idx])) - self.tokens.append(''.join(self.nametmpl).format(*txtfields)) - return for n in range(self.count): - if endnums and endnums[n] != nums[n]: - outval = '{}:{}'.format(nums[n], endnums[n]) - else: - outval = '{}'.format(nums[n]) if self.sequences[n] is None: # We initialize to text pieces, 'currstart', and 'prev' number - self.sequences[n] = [[outval], nums[n], nums[n]] + self.sequences[n] = [[], nums[n], nums[n]] elif self.sequences[n][2] == nums[n]: continue # new nodename has no new number, keep going elif self.sequences[n][2] != nums[n]: - if self.diffn is not None and (n != self.diffn or enddifn != n): + if self.diffn is not None and n != self.diffn: self.flush_current() self.sequences[n] = [[], nums[n], nums[n]] - self.diffn = n - self.sequences[n][0].append(outval) - self.sequences[n][2] = nums[n] - elif False: # previous attempt - # A discontinuity, need to close off previous chunk - currstart = self.sequences[n][1] - prevnum = self.sequences[n][2] - if currstart == prevnum: - self.sequences[n][0].append('{}'.format(currstart)) - elif prevnum == currstart + 1: - self.sequences[n][0].append('{},{}'.format(currstart, prevnum)) + self.diffn = None else: - self.sequences[n][0].append('{}:{}'.format(currstart, prevnum)) - self.sequences[n][1] = nums[n] - self.sequences[n][2] = nums[n] - elif False: # self.sequences[n][2] == nums[n] - 1: # sequential, increment prev + self.diffn = n + if self.sequences[n][2] == (nums[n] - 1): + self.sequences[n][2] = nums[n] + elif self.sequences[n][2] < (nums[n] - 1): + if self.sequences[n][2] != self.sequences[n][1]: + self.sequences[n][0].append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) + else: + self.sequences[n][0].append('{}'.format(self.sequences[n][1])) + self.sequences[n][1] = nums[n] self.sequences[n][2] = nums[n] else: raise Exception('Decreasing node in extend call, not supported') @@ -152,11 +121,11 @@ class Bracketer(object): txtfields = [] if self.sequences and self.sequences[0] is not None: for n in range(self.count): + if self.sequences[n][1] == self.sequences[n][2]: + self.sequences[n][0].append('{}'.format(self.sequences[n][1])) + else: + self.sequences[n][0].append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) txtfield = ','.join(self.sequences[n][0]) - #if self.sequences[n][1] == self.sequences[n][2]: - # txtfield.append('{}'.format(self.sequences[n][1])) - #else: - # txtfield.append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) if txtfield.isdigit(): txtfields.append(txtfield) else: @@ -172,6 +141,7 @@ class Bracketer(object): self.flush_current() return ','.join(self.tokens) + def group_elements(elems): """ Take the specefied elements and chunk them according to text similarity """ @@ -192,74 +162,6 @@ def group_elements(elems): prev = elemtxt return chunked_elems -def abbreviate_chunk(chunk, validset): - if len(chunk) < 3: - return sorted(chunk, key=humanify_nodename) - vset = set(validset) - cset = set(chunk) - minmaxes = [None] - diffn = None - prevns = None - for name in chunk: - currns = getnumbers_nodename(name) - if minmaxes[-1] is None: - minmaxes[-1] = [list(currns), list(currns)] - continue - if prevns is None: - prevns = currns - for n in range(len(currns)): - if prevns[n] != currns[n]: - if diffn is None: - diffn = n - elif diffn != n: - minmaxes.append([list(currns), list(currns)]) - continue - if currns[n] < minmaxes[-1][0][n]: - minmaxes.append([list(currns), list(currns)]) - if currns[n] > minmaxes[-1][1][n]: - minmaxes[-1][1][n] = currns[n] - prevns = currns - tmplt = ''.join(unnumber_nodename(chunk[0])) - ranges = [] - for x in minmaxes: - process_abbreviation(vset, cset, x[0], x[1], tmplt, ranges) - return ranges - -def process_abbreviation(vset, cset, mins, maxs, tmplt, ranges): - bgnr = tmplt.format(*mins) - endr = tmplt.format(*maxs) - nr = '{}:{}'.format(bgnr, endr) - prospect = NodeRange(nr).nodes - discontinuities = (prospect - vset).union(prospect - cset) - currstart = None - prevnode = None - chunksize = 0 - prospect = sorted(prospect, key=humanify_nodename) - currstart = prospect[0] - while prospect: - currnode = prospect.pop(0) - if currnode in discontinuities: - if chunksize == 0: - continue - elif chunksize == 1: - ranges.append(prevnode) - elif chunksize == 2: - ranges.extend([currstart, prevnode]) - else: - ranges.append(':'.join([currstart, prevnode])) - chunksize = 0 - currstart = None - continue - elif not currstart: - currstart = currnode - chunksize += 1 - prevnode = currnode - if chunksize == 1: - ranges.append(prevnode) - elif chunksize == 2: - ranges.extend([currstart, prevnode]) - elif chunksize != 0: - ranges.append(':'.join([currstart, prevnode])) class ReverseNodeRange(object): """Abbreviate a set of nodes to a shorter noderange representation @@ -305,9 +207,8 @@ class ReverseNodeRange(object): subsetgroups.sort(key=humanify_nodename) groupchunks = group_elements(subsetgroups) for gc in groupchunks: - currchunks = abbreviate_chunk(gc, allgroups) - bracketer = Bracketer(currchunks[0]) - for chnk in currchunks[1:]: + bracketer = Bracketer(gc[0]) + for chnk in gc[1:]: bracketer.extend(chnk) ranges.append(bracketer.range) except Exception: @@ -316,9 +217,8 @@ class ReverseNodeRange(object): nodes = sorted(self.nodes, key=humanify_nodename) nodechunks = group_elements(nodes) for nc in nodechunks: - currchunks = abbreviate_chunk(nc, self.cfm.list_nodes()) - bracketer = Bracketer(currchunks[0]) - for chnk in currchunks[1:]: + bracketer = Bracketer(nc[0]) + for chnk in nc[1:]: bracketer.extend(chnk) ranges.append(bracketer.range) except Exception: From 2d906e188628aeb3255915a86864135b8d96de7d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 11 Oct 2023 10:15:24 -0400 Subject: [PATCH 086/126] Fix handling of pre-existing array --- misc/swraid | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/misc/swraid b/misc/swraid index 3e234dc8..836f1fb1 100644 --- a/misc/swraid +++ b/misc/swraid @@ -1,6 +1,6 @@ DEVICES="/dev/sda /dev/sdb" RAIDLEVEL=1 -mdadm --detail /dev/md*|grep 'Version : 1.0' >& /dev/null && exit 0 +mdadm --detail /dev/md*|grep 'Version : 1.0' >& /dev/null || ( lvm vgchange -a n mdadm -S -s NUMDEVS=$(for dev in $DEVICES; do @@ -14,5 +14,6 @@ mdadm -C /dev/md/raid $DEVICES -n $NUMDEVS -e 1.0 -l $RAIDLEVEL # shut and restart array to prime things for anaconda mdadm -S -s mdadm --assemble --scan +) readlink /dev/md/raid|sed -e 's/.*\///' > /tmp/installdisk From 6e4d9d9eb485107f327eedf6477d7c5d7308f4ab Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 12 Oct 2023 14:46:09 -0400 Subject: [PATCH 087/126] Address potential slowdowns by misbehaving DNS For one, shorten the DNS timeout, if the DNS server is completely out, give up quickly. For another, if a host has a large number of net.X.hostnames, the sequential nature was intolerable. Have each network be evaluated in a greenthread concurrently to serve the DNS latency concurrently. --- confluent_server/confluent/netutil.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/confluent_server/confluent/netutil.py b/confluent_server/confluent/netutil.py index 37e8d198..9e9fd597 100644 --- a/confluent_server/confluent/netutil.py +++ b/confluent_server/confluent/netutil.py @@ -25,6 +25,9 @@ import eventlet.support.greendns import os getaddrinfo = eventlet.support.greendns.getaddrinfo +eventlet.support.greendns.resolver.clear() +eventlet.support.greendns.resolver._resolver.lifetime = 1 + def msg_align(len): return (len + 3) & ~3 @@ -333,11 +336,13 @@ def get_full_net_config(configmanager, node, serverip=None): myaddrs = get_addresses_by_serverip(serverip) nm = NetManager(myaddrs, node, configmanager) defaultnic = {} + ppool = eventlet.greenpool.GreenPool(64) if None in attribs: - nm.process_attribs(None, attribs[None]) + ppool.spawn(nm.process_attribs, None, attribs[None]) del attribs[None] for netname in sorted(attribs): - nm.process_attribs(netname, attribs[netname]) + ppool.spawn(nm.process_attribs, netname, attribs[netname]) + ppool.waitall() retattrs = {} if None in nm.myattribs: retattrs['default'] = nm.myattribs[None] From 3a6932ea6dab02a69c03a2a8f30e3a4e7623e5e2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 12 Oct 2023 15:28:54 -0400 Subject: [PATCH 088/126] Start tracking padding during abbreviation This will take care of padding when padding is consistent across a range. However, we still have a problem with a progression like: 01 02 ... 98 099 100 Where numbers in the middle start getting padding unexpectedly without a leading digit. --- confluent_server/confluent/noderange.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 9a7ed44b..1c023707 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -62,14 +62,15 @@ def unnumber_nodename(nodename): return chunked def getnumbers_nodename(nodename): - return [int(x) for x in re.split(numregex, nodename) if x.isdigit()] + return [x for x in re.split(numregex, nodename) if x.isdigit()] class Bracketer(object): - __slots__ = ['sequences', 'count', 'nametmpl', 'diffn', 'tokens'] + __slots__ = ['sequences', 'count', 'nametmpl', 'diffn', 'tokens', 'numlens'] def __init__(self, nodename): self.sequences = [] + self.numlens = [] realnodename = nodename if ':' in nodename: realnodename = nodename.split(':', 1)[0] @@ -77,6 +78,7 @@ class Bracketer(object): self.nametmpl = unnumber_nodename(realnodename) for n in range(self.count): self.sequences.append(None) + self.numlens.append([0, 0]) self.diffn = None self.tokens = [] self.extend(nodename) @@ -84,6 +86,7 @@ class Bracketer(object): self.tokens = [nodename] def extend(self, nodeorseq): + # crap... failed to preserve 0 padding foro fixe width # can only differentiate a single number endname = None endnums = None @@ -91,29 +94,37 @@ class Bracketer(object): nodename, endname = nodeorseq.split(':', 1) else: nodename = nodeorseq - nums = getnumbers_nodename(nodename) + txtnums = getnumbers_nodename(nodename) + nums = [int(x) for x in txtnums] for n in range(self.count): if self.sequences[n] is None: # We initialize to text pieces, 'currstart', and 'prev' number self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] elif self.sequences[n][2] == nums[n]: continue # new nodename has no new number, keep going elif self.sequences[n][2] != nums[n]: if self.diffn is not None and n != self.diffn: self.flush_current() self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] self.diffn = None else: self.diffn = n if self.sequences[n][2] == (nums[n] - 1): self.sequences[n][2] = nums[n] + self.numlens[n][1] = len(txtnums[n]) elif self.sequences[n][2] < (nums[n] - 1): if self.sequences[n][2] != self.sequences[n][1]: - self.sequences[n][0].append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) + fmtstr = '{{:0{}d}}:{{:0{}d}}'.format(*self.numlens[n]) + self.sequences[n][0].append(fmtstr.format(self.sequences[n][1], self.sequences[n][2])) else: - self.sequences[n][0].append('{}'.format(self.sequences[n][1])) + fmtstr = '{{:0{}d}}'.format(self.numlens[n][0]) + self.sequences[n][0].append(fmtstr.format(self.sequences[n][1])) self.sequences[n][1] = nums[n] + self.numlens[n][0] = len(txtnums[n]) self.sequences[n][2] = nums[n] + self.numlens[n][1] = len(txtnums[n]) else: raise Exception('Decreasing node in extend call, not supported') From bfbb7c2843e4353c0342b0b7bb1a4bc40beda534 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 12 Oct 2023 16:09:40 -0400 Subject: [PATCH 089/126] Handle mid-range pad changing, and identical names with only pad difference This would be painful to operate, but if done at least reverse noderange will faithfully honor it now. --- confluent_server/confluent/noderange.py | 26 +++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index 1c023707..e76391e8 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -86,7 +86,6 @@ class Bracketer(object): self.tokens = [nodename] def extend(self, nodeorseq): - # crap... failed to preserve 0 padding foro fixe width # can only differentiate a single number endname = None endnums = None @@ -97,23 +96,26 @@ class Bracketer(object): txtnums = getnumbers_nodename(nodename) nums = [int(x) for x in txtnums] for n in range(self.count): + padto = len(txtnums[n]) + needpad = (padto != len('{}'.format(nums[n]))) if self.sequences[n] is None: # We initialize to text pieces, 'currstart', and 'prev' number self.sequences[n] = [[], nums[n], nums[n]] self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] - elif self.sequences[n][2] == nums[n]: + elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: continue # new nodename has no new number, keep going - elif self.sequences[n][2] != nums[n]: - if self.diffn is not None and n != self.diffn: + else: # if self.sequences[n][2] != nums[n] or : + if self.diffn is not None and (n != self.diffn or + (needpad and padto != self.numlens[n][1])): self.flush_current() self.sequences[n] = [[], nums[n], nums[n]] - self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] + self.numlens[n] = [padto, padto] self.diffn = None else: self.diffn = n if self.sequences[n][2] == (nums[n] - 1): self.sequences[n][2] = nums[n] - self.numlens[n][1] = len(txtnums[n]) + self.numlens[n][1] = padto elif self.sequences[n][2] < (nums[n] - 1): if self.sequences[n][2] != self.sequences[n][1]: fmtstr = '{{:0{}d}}:{{:0{}d}}'.format(*self.numlens[n]) @@ -122,20 +124,20 @@ class Bracketer(object): fmtstr = '{{:0{}d}}'.format(self.numlens[n][0]) self.sequences[n][0].append(fmtstr.format(self.sequences[n][1])) self.sequences[n][1] = nums[n] - self.numlens[n][0] = len(txtnums[n]) + self.numlens[n][0] = padto self.sequences[n][2] = nums[n] - self.numlens[n][1] = len(txtnums[n]) - else: - raise Exception('Decreasing node in extend call, not supported') + self.numlens[n][1] = padto def flush_current(self): txtfields = [] if self.sequences and self.sequences[0] is not None: for n in range(self.count): if self.sequences[n][1] == self.sequences[n][2]: - self.sequences[n][0].append('{}'.format(self.sequences[n][1])) + fmtstr = '{{:0{}d}}'.format(self.numlens[n][0]) + self.sequences[n][0].append(fmtstr.format(self.sequences[n][1])) else: - self.sequences[n][0].append('{}:{}'.format(self.sequences[n][1], self.sequences[n][2])) + fmtstr = '{{:0{}d}}:{{:0{}d}}'.format(*self.numlens[n]) + self.sequences[n][0].append(fmtstr.format(self.sequences[n][1], self.sequences[n][2])) txtfield = ','.join(self.sequences[n][0]) if txtfield.isdigit(): txtfields.append(txtfield) From 0434f38ea12035b98e6997fa06c755bb55694bc9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 13 Oct 2023 15:25:08 -0400 Subject: [PATCH 090/126] Add iterm and kitty image support to stats This delivers improved graphics speed and quality for selected terminals. --- confluent_client/bin/stats | 45 ++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/confluent_client/bin/stats b/confluent_client/bin/stats index 7158e7a7..94af75db 100755 --- a/confluent_client/bin/stats +++ b/confluent_client/bin/stats @@ -16,13 +16,10 @@ # limitations under the License. import argparse +import base64 import csv -import fcntl import io import numpy as np - -import os -import subprocess import sys try: @@ -35,7 +32,31 @@ except ImportError: pass -def plot(gui, output, plotdata, bins): +def iterm_draw(data): + databuf = data.getbuffer() + datalen = len(databuf) + data = base64.b64encode(databuf).decode('utf8') + sys.stdout.write( + '\x1b]1337;File=inline=1;size={}:'.format(datalen)) + sys.stdout.write(data) + sys.stdout.write('\a') + sys.stdout.write('\n') + sys.stdout.flush() + + +def kitty_draw(data): + data = base64.b64encode(data.getbuffer()) + while data: + chunk, data = data[:4096], data[4096:] + m = 1 if data else 0 + sys.stdout.write('\x1b_Ga=T,f=100,m={};'.format(m)) + sys.stdout.write(chunk.decode('utf8')) + sys.stdout.write('\x1b\\') + sys.stdout.flush() + sys.stdout.write('\n') + + +def plot(gui, output, plotdata, bins, fmt): import matplotlib as mpl if gui and mpl.get_backend() == 'agg': sys.stderr.write('Error: No GUI backend available and -g specified!\n') @@ -51,8 +72,13 @@ def plot(gui, output, plotdata, bins): tdata = io.BytesIO() plt.savefig(tdata) if not gui and not output: - writer = DumbWriter() - writer.draw(tdata) + if fmt == 'sixel': + writer = DumbWriter() + writer.draw(tdata) + elif fmt == 'kitty': + kitty_draw(tdata) + elif fmt == 'iterm': + iterm_draw(tdata) return n, bins def textplot(plotdata, bins): @@ -81,7 +107,8 @@ histogram = False aparser = argparse.ArgumentParser(description='Quick access to common statistics') aparser.add_argument('-c', type=int, default=0, help='Column number to analyze (default is last column)') aparser.add_argument('-d', default=None, help='Value used to separate columns') -aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in sixel format') +aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in graphical format') +aparser.add_argument('-f', default='sixel', help='Format for histogram output (sixel/iterm/kitty)') aparser.add_argument('-s', default=0, help='Number of header lines to skip before processing') aparser.add_argument('-g', default=False, action='store_true', help='Open histogram in separate graphical window') aparser.add_argument('-o', default=None, help='Output histogram to the specified filename in PNG format') @@ -138,7 +165,7 @@ while data: data = list(csv.reader([data], delimiter=delimiter))[0] n = None if args.g or args.o or args.x: - n, bins = plot(args.g, args.o, plotdata, bins=args.b) + n, bins = plot(args.g, args.o, plotdata, bins=args.b, fmt=args.f) if args.t: n, bins = textplot(plotdata, bins=args.b) print('Samples: {5} Min: {3} Median: {0} Mean: {1} Max: {4} StandardDeviation: {2} Sum: {6}'.format(np.median(plotdata), np.mean(plotdata), np.std(plotdata), np.min(plotdata), np.max(plotdata), len(plotdata), np.sum(plotdata))) From 06d18cec63e2da6ddf3b0fe3f3bdc4bf0d0412aa Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 16 Oct 2023 08:29:45 -0400 Subject: [PATCH 091/126] Fix abbreviation when pad decreases This is a bizarre way to work, but it should be valid. --- confluent_server/confluent/noderange.py | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index e76391e8..dcf0a1cb 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -106,6 +106,7 @@ class Bracketer(object): continue # new nodename has no new number, keep going else: # if self.sequences[n][2] != nums[n] or : if self.diffn is not None and (n != self.diffn or + (padto < self.numlens[n][1]) or (needpad and padto != self.numlens[n][1])): self.flush_current() self.sequences[n] = [[], nums[n], nums[n]] From b91a19418453b902c4b066e30fd89194c069d6d2 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 17 Oct 2023 16:29:30 -0400 Subject: [PATCH 092/126] Improve selfselfice performance with yaml The yaml python default behavior is 'pure python' and is tortuously slow. As a test, yaml dump of a 17,000 element list took 70 seconds in default configuration. Opting into the C functions, that time comes down to 10 seconds, a nice and easy improvement for generic yaml. For dumping a simple dumb list (e.g. the nodelist for ssh), a special case yaml-looking result is done, which hits 0.4 seconds on that same test. So this special case is added to nodelist, which can be very long and very in demand at the same time. --- confluent_server/confluent/selfservice.py | 32 +++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/confluent_server/confluent/selfservice.py b/confluent_server/confluent/selfservice.py index 04030491..cd4180c7 100644 --- a/confluent_server/confluent/selfservice.py +++ b/confluent_server/confluent/selfservice.py @@ -19,6 +19,12 @@ import json import os import time import yaml +try: + from yaml import CSafeDumper as SafeDumper + from yaml import CSafeLoader as SafeLoader +except ImportError: + from yaml import SafeLoader + from yaml import SafeDumper import confluent.discovery.protocols.ssdp as ssdp import eventlet webclient = eventlet.import_patched('pyghmi.util.webclient') @@ -31,7 +37,20 @@ currtzvintage = None def yamldump(input): - return yaml.safe_dump(input, default_flow_style=False) + return yaml.dump_all([input], Dumper=SafeDumper, default_flow_style=False) + +def yamlload(input): + return yaml.load(input, Loader=SafeLoader) + +def listdump(input): + # special case yaml for flat dumb list + # this is about 25x faster than doing full yaml dump even with CSafeDumper + # with a 17,000 element list + retval = '' + for entry in input: + retval += '- ' + entry + '\n' + return retval + def get_extra_names(nodename, cfg, myip=None): names = set([]) @@ -402,10 +421,13 @@ def handle_request(env, start_response): yield node + '\n' else: start_response('200 OK', (('Content-Type', retype),)) - yield dumper(list(util.natural_sort(nodes))) + if retype == 'application/yaml': + yield listdump(list(util.natural_sort(nodes))) + else: + yield dumper(list(util.natural_sort(nodes))) elif env['PATH_INFO'] == '/self/remoteconfigbmc' and reqbody: try: - reqbody = yaml.safe_load(reqbody) + reqbody = yamlload(reqbody) except Exception: reqbody = None cfgmod = reqbody.get('configmod', 'unspecified') @@ -419,7 +441,7 @@ def handle_request(env, start_response): start_response('200 Ok', ()) yield 'complete' elif env['PATH_INFO'] == '/self/updatestatus' and reqbody: - update = yaml.safe_load(reqbody) + update = yamlload(reqbody) statusstr = update.get('state', None) statusdetail = update.get('state_detail', None) didstateupdate = False @@ -522,7 +544,7 @@ def handle_request(env, start_response): '/var/lib/confluent/public/os/{0}/scripts/{1}') if slist: start_response('200 OK', (('Content-Type', 'application/yaml'),)) - yield yaml.safe_dump(util.natural_sort(slist), default_flow_style=False) + yield yamldump(util.natural_sort(slist)) else: start_response('200 OK', ()) yield '' From 8b150a904765f79fbe99d848a952b8513723f42e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 19 Oct 2023 09:25:57 -0400 Subject: [PATCH 093/126] Fix for post group failures A node failure after group failure would erase the group from range. Further, correct an issue where an empty nodeset would trigger a bad behavior. --- confluent_server/confluent/noderange.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index dcf0a1cb..df4552b8 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -221,22 +221,27 @@ class ReverseNodeRange(object): subsetgroups.sort(key=humanify_nodename) groupchunks = group_elements(subsetgroups) for gc in groupchunks: + if not gc: + continue bracketer = Bracketer(gc[0]) for chnk in gc[1:]: bracketer.extend(chnk) ranges.append(bracketer.range) except Exception: subsetgroups.sort() + ranges.extend(subsetgroups) try: nodes = sorted(self.nodes, key=humanify_nodename) nodechunks = group_elements(nodes) for nc in nodechunks: + if not nc: + continue bracketer = Bracketer(nc[0]) for chnk in nc[1:]: bracketer.extend(chnk) ranges.append(bracketer.range) except Exception: - ranges = sorted(self.nodes) + ranges.extend(sorted(self.nodes)) return ','.join(ranges) From 063bfc17a57cad22e5fbc3f42d29458a5d271790 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 19 Oct 2023 10:40:57 -0400 Subject: [PATCH 094/126] Start using container for final build process Makes supporting the base platform easier by largely ignoring the base platform. --- confluent_osdeploy/buildrpm-aarch64 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/confluent_osdeploy/buildrpm-aarch64 b/confluent_osdeploy/buildrpm-aarch64 index 867c0102..7f95852b 100644 --- a/confluent_osdeploy/buildrpm-aarch64 +++ b/confluent_osdeploy/buildrpm-aarch64 @@ -29,4 +29,5 @@ mv confluent_el8bin.tar.xz ~/rpmbuild/SOURCES/ mv confluent_el9bin.tar.xz ~/rpmbuild/SOURCES/ rm -rf el9bin rm -rf el8bin -rpmbuild -ba confluent_osdeploy-aarch64.spec +podman run --privileged --rm -v $HOME:/root el8builder rpmbuild -ba /root/confluent/confluent_osdeploy/confluent_osdeploy-aarch64.spec + From 913a26aec93b3c0d55dea1963e3c5a8fe46dae14 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 19 Oct 2023 10:42:39 -0400 Subject: [PATCH 095/126] Change to consistent CWD for osdeploy arm build --- confluent_osdeploy/buildrpm-aarch64 | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/buildrpm-aarch64 b/confluent_osdeploy/buildrpm-aarch64 index 7f95852b..83ffc519 100644 --- a/confluent_osdeploy/buildrpm-aarch64 +++ b/confluent_osdeploy/buildrpm-aarch64 @@ -1,3 +1,4 @@ +cd $(dirname $0) VERSION=`git describe|cut -d- -f 1` NUMCOMMITS=`git describe|cut -d- -f 2` if [ "$NUMCOMMITS" != "$VERSION" ]; then From 9c9d71882c76bbf4bf989b8c8dacced5d7878794 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 19 Oct 2023 15:51:40 -0400 Subject: [PATCH 096/126] Disable keepalive Unfortunately, apache can get a bit odd over how it reports a non-viable open socket for keepalive, which can happen in certain windows. Disable the keepalive feature and take some performance penalty in browsers for the sake of more consistent return behavior and fewer idle greenthreads doing nothing. --- confluent_server/confluent/httpapi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index 5a145a0c..f36f2c73 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -1084,7 +1084,7 @@ def serve(bind_host, bind_port): pass # we gave it our best shot there try: eventlet.wsgi.server(sock, resourcehandler, log=False, log_output=False, - debug=False, socket_timeout=60) + debug=False, socket_timeout=60, keepalive=False) except TypeError: # Older eventlet in place, skip arguments it does not understand eventlet.wsgi.server(sock, resourcehandler, log=False, debug=False) From 49a504972f2b17bce0ab396758b4dd4ac798e1af Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 25 Oct 2023 14:21:55 -0400 Subject: [PATCH 097/126] Fix syntax error in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index f2a2edff..7e641205 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -435,7 +435,7 @@ if __name__ == '__main__': curridx = addr[-1] if curridx in doneidxs: continue - for tries in (1, 2 3): + for tries in (1, 2, 3): try: status, nc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/netcfg') break @@ -446,7 +446,7 @@ if __name__ == '__main__': continue nc = json.loads(nc) if not dc: - for tries in (1, 2 3): + for tries in (1, 2, 3): try: status, dc = apiclient.HTTPSClient(usejson=True, host=srv).grab_url_with_status('/confluent-api/self/deploycfg2') break From 0857716f64a294b44de4ba883dd552af33800ff5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 26 Oct 2023 08:58:37 -0400 Subject: [PATCH 098/126] Add support for normalized sensors This opens the door for normalized common sensors for clients that care about the semantics but cannot keep track of inconsistent sensor names from implementation to implementation. --- confluent_server/confluent/core.py | 14 ++++++++++++++ .../plugins/hardwaremanagement/ipmi.py | 19 +++++++++++++++++++ .../plugins/hardwaremanagement/redfish.py | 19 +++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py index f70bc6ae..6ab6bd59 100644 --- a/confluent_server/confluent/core.py +++ b/confluent_server/confluent/core.py @@ -481,6 +481,20 @@ def _init_core(): 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', }), + 'normalized': { + 'inlet_temp': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'average_cpu_temp': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + 'total_power': PluginRoute({ + 'pluginattrs': ['hardwaremanagement.method'], + 'default': 'ipmi', + }), + }, 'energy': PluginCollection({ 'pluginattrs': ['hardwaremanagement.method'], 'default': 'ipmi', diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index 938b69ae..06a8c444 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -861,6 +861,23 @@ class IpmiHandler(object): resourcename = sensor['name'] self.ipmicmd.sensormap[simplify_name(resourcename)] = resourcename + def read_normalized(self, sensorname): + readings = None + if sensorname == 'average_cpu_temp': + cputemp = self.ipmicmd.get_average_processor_temperature() + readings = [cputemp] + elif sensorname == 'inlet_temp': + inltemp = self.ipmicmd.get_inlet_temperature() + readings = [inltemp] + elif sensorname == 'total_power': + sensor = EmptySensor('Total Power') + sensor.states = [] + sensor.units = 'W' + sensor.value = self.ipmicmd.get_system_power_watts() + readings = [sensor] + if readings: + self.output.put(msg.SensorReadings(readings, name=self.node)) + def read_sensors(self, sensorname): if sensorname == 'all': sensors = self.ipmicmd.get_sensor_descriptions() @@ -1157,6 +1174,8 @@ class IpmiHandler(object): if len(self.element) < 3: return self.sensorcategory = self.element[2] + if self.sensorcategory == 'normalized': + return self.read_normalized(self.element[-1]) # list sensors per category if len(self.element) == 3 and self.element[-2] == 'hardware': if self.sensorcategory == 'leds': diff --git a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py index 20315134..f53cc393 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/redfish.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/redfish.py @@ -712,6 +712,23 @@ class IpmiHandler(object): resourcename = sensor['name'] self.sensormap[simplify_name(resourcename)] = resourcename + def read_normalized(self, sensorname): + readings = None + if sensorname == 'average_cpu_temp': + cputemp = self.ipmicmd.get_average_processor_temperature() + readings = [cputemp] + elif sensorname == 'inlet_temp': + inltemp = self.ipmicmd.get_inlet_temperature() + readings = [inltemp] + elif sensorname == 'total_power': + sensor = EmptySensor('Total Power') + sensor.states = [] + sensor.units = 'W' + sensor.value = self.ipmicmd.get_system_power_watts() + readings = [sensor] + if readings: + self.output.put(msg.SensorReadings(readings, name=self.node)) + def read_sensors(self, sensorname): if sensorname == 'all': sensors = self.ipmicmd.get_sensor_descriptions() @@ -1012,6 +1029,8 @@ class IpmiHandler(object): if len(self.element) < 3: return self.sensorcategory = self.element[2] + if self.sensorcategory == 'normalized': + return self.read_normalized(self.element[-1]) # list sensors per category if len(self.element) == 3 and self.element[-2] == 'hardware': if self.sensorcategory == 'leds': From d0826106780c82c3b2c60441b3d3e6c85f256983 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 27 Oct 2023 13:34:52 -0400 Subject: [PATCH 099/126] Add more deep checking of node networking Whether due to the management node or node IP addresses, check if deployment can reasonably proceed using IPv4 or IPv6, and give a warning with some suggestions to check. Also, add nodeinventory -s as an example resolution for missing uuid. --- confluent_server/bin/confluent_selfcheck | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/confluent_server/bin/confluent_selfcheck b/confluent_server/bin/confluent_selfcheck index 1b504e95..cc1409cf 100755 --- a/confluent_server/bin/confluent_selfcheck +++ b/confluent_server/bin/confluent_selfcheck @@ -15,6 +15,7 @@ import confluent.sshutil as sshutil import confluent.certutil as certutil import confluent.client as client import confluent.config.configmanager as configmanager +import confluent.netutil as netutil import eventlet.green.subprocess as subprocess import tempfile import shutil @@ -244,7 +245,7 @@ if __name__ == '__main__': allok = False uuidok = True # not really, but suppress the spurious error dnsdomain = rsp.get('dns.domain', {}).get('value', '') - if ',' in dnsdomain or ' ' in dnsdomain: + if dnsdomain and (',' in dnsdomain or ' ' in dnsdomain): allok = False emprint(f'{args.node} has a dns.domain that appears to be a search instead of singular domain') uuidok = True # not really, but suppress the spurious error @@ -269,9 +270,28 @@ if __name__ == '__main__': switch_value = rsp[key].get('value',None) if switch_value and switch_value not in valid_nodes: emprint(f'{switch_value} is not a valid node name (as referenced by attribute "{key}" of node {args.node}).') + print(f"Checking network configuration for {args.node}") + cfg = configmanager.ConfigManager(None) + bootablev4nics = [] + bootablev6nics = [] + for nic in glob.glob("/sys/class/net/*/ifindex"): + idx = int(open(nic, "r").read()) + nicname = nic.split('/')[-2] + ncfg = netutil.get_nic_config(cfg, args.node, ifidx=idx) + if ncfg['ipv4_address'] or ncfg['ipv4_method'] == 'dhcp': + bootablev4nics.append(nicname) + if ncfg['ipv6_address']: + bootablev6nics.append(nicname) + if bootablev4nics: + print("{} appears to have network configuration suitable for IPv4 deployment via: {}".format(args.node, ",".join(bootablev4nics))) + elif bootablev6nics: + print('{} appears to have networking configuration suitable for IPv6 deployment via: {}'.format(args.node, ",".join(bootablev6nics))) + else: + emprint(f"{args.node} may not have any viable IP network configuration (check name resolution (DNS or hosts file) " + "and/or net.*ipv4_address, and verify that the deployment serer addresses and subnet mask/prefix length are accurate)") if not uuidok and not macok: allok = False - emprint(f'{args.node} does not have a uuid or mac address defined in id.uuid or net.*hwaddr, deployment will not work') + emprint(f'{args.node} does not have a uuid or mac address defined in id.uuid or net.*hwaddr, deployment will not work (Example resolution: nodeinventory {args.node} -s)') if allok: print(f'No issues detected with attributes of {args.node}') fprint("Checking name resolution: ") From a1ac234b73173c87a679d1f718cf4b70dd5115da Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 27 Oct 2023 15:31:14 -0400 Subject: [PATCH 100/126] Enhance error message for authentication issue during syncfiles --- confluent_server/confluent/sshutil.py | 16 +++++++++++++--- confluent_server/confluent/syncfiles.py | 2 ++ confluent_server/confluent/util.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index 0a52fe81..d097ade1 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -129,11 +129,21 @@ def prep_ssh_key(keyname): ap.write('#!/bin/sh\necho $CONFLUENT_SSH_PASSPHRASE\nrm {0}\n'.format(askpass)) os.chmod(askpass, 0o700) os.environ['CONFLUENT_SSH_PASSPHRASE'] = get_passphrase() + olddisplay = os.environ.get('DISPLAY', None) + oldaskpass = os.environ.get('SSH_ASKPASS', None) os.environ['DISPLAY'] = 'NONE' os.environ['SSH_ASKPASS'] = askpass - with open(os.devnull, 'wb') as devnull: - subprocess.check_output(['ssh-add', keyname], stdin=devnull, stderr=devnull) - del os.environ['CONFLUENT_SSH_PASSPHRASE'] + try: + with open(os.devnull, 'wb') as devnull: + subprocess.check_output(['ssh-add', keyname], stdin=devnull, stderr=devnull) + finally: + del os.environ['CONFLUENT_SSH_PASSPHRASE'] + del os.environ['DISPLAY'] + del os.environ['SSH_ASKPASS'] + if olddisplay: + os.environ['DISPLAY'] = olddisplay + if oldaskpass: + os.environ['SSH_ASKPASS'] = oldaskpass ready_keys[keyname] = 1 finally: adding_key = False diff --git a/confluent_server/confluent/syncfiles.py b/confluent_server/confluent/syncfiles.py index 556d9bcf..6c11d072 100644 --- a/confluent_server/confluent/syncfiles.py +++ b/confluent_server/confluent/syncfiles.py @@ -212,6 +212,8 @@ def sync_list_to_node(sl, node, suffixes, peerip=None): unreadablefiles.append(filename.replace(targdir, '')) if unreadablefiles: raise Exception("Syncing failed due to unreadable files: " + ','.join(unreadablefiles)) + elif b'Permission denied, please try again.' in e.stderr: + raise Exception('Syncing failed due to authentication error, is the confluent automation key not set up (osdeploy initialize -a) or is there some process replacing authorized_keys on the host?') else: raise finally: diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 1509a827..8cf9bbc9 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -42,7 +42,7 @@ def run(cmd): stdout, stderr = process.communicate() retcode = process.poll() if retcode: - raise subprocess.CalledProcessError(retcode, process.args, output=stdout) + raise subprocess.CalledProcessError(retcode, process.args, output=stdout, stderr=stderr) return stdout, stderr From 814f4208529720989842aba900304941c9f09aa9 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 31 Oct 2023 11:47:28 -0400 Subject: [PATCH 101/126] Update genesis to deal with CS9 sshd --- .../genesis/initramfs/opt/confluent/bin/rungenesis | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis index b7035fe0..ebf0a380 100644 --- a/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis +++ b/confluent_osdeploy/genesis/initramfs/opt/confluent/bin/rungenesis @@ -174,6 +174,8 @@ dnsdomain=${dnsdomain#dnsdomain: } echo search $dnsdomain >> /etc/resolv.conf echo -n "Initializing ssh..." ssh-keygen -A +mkdir -p /usr/share/empty.sshd +rm /etc/ssh/ssh_host_dsa_key* for pubkey in /etc/ssh/ssh_host*key.pub; do certfile=${pubkey/.pub/-cert.pub} privfile=${pubkey%.pub} From 8a4ef0b1fe237fae9c579194d553f3fdebddfcf6 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 1 Nov 2023 10:42:33 -0400 Subject: [PATCH 102/126] Make link type detection more specific If the ip command shows altnames, do not let the altnames interfere with locking on to linktype. Further, use show dev instead of grep to be more specific. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index b2881e0b..65abf8f6 100644 --- a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -189,7 +189,7 @@ cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC EOC echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection -linktype=$(ip link |grep -A2 ${ifname}|tail -n 1|awk '{print $1}') +linktype=$(ip link show dev ${ifname}|grep link/|awk '{print $1}') if [ "$linktype" = link/infiniband ]; then linktype="infiniband" else diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 4fca92cf..a9eba388 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -154,7 +154,7 @@ cat > /run/NetworkManager/system-connections/$ifname.nmconnection << EOC EOC echo id=${ifname} >> /run/NetworkManager/system-connections/$ifname.nmconnection echo uuid=$(uuidgen) >> /run/NetworkManager/system-connections/$ifname.nmconnection -linktype=$(ip link |grep -A2 ${ifname}|tail -n 1|awk '{print $1}') +linktype=$(ip link show dev ${ifname}|grep link/|awk '{print $1}') if [ "$linktype" = link/infiniband ]; then linktype="infiniband" else From 8f927d94e9b9f29d62c3228e43cd2533d8ae467e Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 1 Nov 2023 11:17:04 -0400 Subject: [PATCH 103/126] Switch to bond from team Teaming is deprecated and EL went back to bond, follow that guidance. --- confluent_osdeploy/common/profile/scripts/confignet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 7e641205..4e9fe9b3 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -344,7 +344,7 @@ class NetworkManager(object): bondcfg[stg] = deats[stg] if member in self.uuidbyname: subprocess.check_call(['nmcli', 'c', 'del', self.uuidbyname[member]]) - subprocess.check_call(['nmcli', 'c', 'add', 'type', 'team-slave', 'master', team, 'con-name', member, 'connection.interface-name', member]) + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond-slave', 'master', team, 'con-name', member, 'connection.interface-name', member]) if bondcfg: args = [] for parm in bondcfg: @@ -378,7 +378,7 @@ class NetworkManager(object): for arg in cmdargs: cargs.append(arg) cargs.append(cmdargs[arg]) - subprocess.check_call(['nmcli', 'c', 'add', 'type', 'team', 'con-name', cname, 'connection.interface-name', cname, 'team.runner', stgs['team_mode']] + cargs) + subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) subprocess.check_call(['nmcli', 'c', 'u', cname]) From e90f2829abade00ee0cd84bb780d7fac912ed383 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Nov 2023 09:37:44 -0500 Subject: [PATCH 104/126] Filter bind mounts from imgutil capture If bind mounts are in use, it will foul the capture. Notably, one example is if you install the firefox snap in ubuntu, snapd creates a bind mount. This will ignore bind mounts, and rely upon the system to put it straight. --- imgutil/imgutil | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/imgutil/imgutil b/imgutil/imgutil index de3a9025..959c4a17 100644 --- a/imgutil/imgutil +++ b/imgutil/imgutil @@ -61,13 +61,27 @@ FALLOC_FL_PUNCH_HOLE = 2 numregex = re.compile('([0-9]+)') def get_partition_info(): + with open('/proc/self/mountinfo') as procinfo: + mountinfo = procinfo.read() + capmounts = set([]) + for entry in mountinfo.split('\n'): + if not entry: + continue + firstinf, lastinf = entry.split(' - ') + root, mount = firstinf.split()[3:5] + filesystem = lastinf.split()[0] + if root != '/': + continue + if filesystem not in ('ext3', 'ext4', 'xfs', 'btrfs', 'vfat'): + continue + capmounts.add(mount) with open('/proc/mounts') as procmounts: mountinfo = procmounts.read() for entry in mountinfo.split('\n'): if not entry: continue dev, mount, fs, flags = entry.split()[:4] - if fs not in ('ext3', 'ext4', 'xfs', 'btrfs', 'vfat'): + if mount not in capmounts: continue fsinfo = os.statvfs(mount) partinfo = { From 2cd75ef4252f2ceb5b70ef38e9be19f60f3602db Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 8 Nov 2023 10:22:51 -0500 Subject: [PATCH 105/126] Fix diskless autocons message --- .../ubuntu20.04-diskless/initramfs/conf/conf.d/confluent | 1 + 1 file changed, 1 insertion(+) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent b/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent index 64a3713d..79787074 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent +++ b/confluent_osdeploy/ubuntu20.04-diskless/initramfs/conf/conf.d/confluent @@ -1,4 +1,5 @@ if ! grep console= /proc/cmdline > /dev/null; then + mkdir -p /custom-installation /opt/confluent/bin/autocons > /custom-installation/autocons.info cons=$(cat /custom-installation/autocons.info) if [ ! -z "$cons" ]; then From e03f010eac0f1afaf372453ed8bb1d2caaefb6a0 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 9 Nov 2023 09:03:59 -0500 Subject: [PATCH 106/126] Fix Ubuntu confignet without IPv6 If confignet did not have ipv6 to work with, it would fail to work at all. Also handle when the configuration has a blank DNS server in it. --- confluent_osdeploy/common/profile/scripts/confignet | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 4e9fe9b3..eaaf2621 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -151,13 +151,14 @@ class NetplanManager(object): needcfgapply = False for devname in devnames: needcfgwrite = False - if stgs['ipv6_method'] == 'static': - curraddr = stgs['ipv6_address'] + # ipv6_method missing at uconn... + if stgs.get('ipv6_method', None) == 'static': + curraddr = stgs'ipv6_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: needcfgwrite = True currips.append(curraddr) - if stgs['ipv4_method'] == 'static': + if stgs.get('ipv4_method', None) == 'static': curraddr = stgs['ipv4_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: @@ -180,7 +181,7 @@ class NetplanManager(object): if dnsips: currdnsips = self.getcfgarrpath([devname, 'nameservers', 'addresses']) for dnsip in dnsips: - if dnsip not in currdnsips: + if dnsip and dnsip not in currdnsips: needcfgwrite = True currdnsips.append(dnsip) if dnsdomain: From 6e092934e7ec4222a175e5bc473cb3a7ae8d4db8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 9 Nov 2023 17:15:17 -0500 Subject: [PATCH 107/126] Fix for ubuntu clone to nvme --- .../ubuntu20.04-diskless/profiles/default/scripts/image2disk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py index 5d15e3d4..1d19ebad 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/image2disk.py @@ -206,6 +206,8 @@ def fixup(rootdir, vols): partnum = re.search('(\d+)$', targdev).group(1) targblock = re.search('(.*)\d+$', targdev).group(1) if targblock: + if targblock.endswith('p') and 'nvme' in targblock: + targblock = targblock[:-1] shimpath = subprocess.check_output(['find', os.path.join(rootdir, 'boot/efi'), '-name', 'shimx64.efi']).decode('utf8').strip() shimpath = shimpath.replace(rootdir, '/').replace('/boot/efi', '').replace('//', '/').replace('/', '\\') subprocess.check_call(['efibootmgr', '-c', '-d', targblock, '-l', shimpath, '--part', partnum]) From ec023831a5d322afb86ca73938a7a79c1fa54ddb Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 9 Nov 2023 17:28:38 -0500 Subject: [PATCH 108/126] Fix syntax error in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index eaaf2621..cb2569ce 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -153,7 +153,7 @@ class NetplanManager(object): needcfgwrite = False # ipv6_method missing at uconn... if stgs.get('ipv6_method', None) == 'static': - curraddr = stgs'ipv6_address'] + curraddr = stgs['ipv6_address'] currips = self.getcfgarrpath([devname, 'addresses']) if curraddr not in currips: needcfgwrite = True From f475d589559627f0222022b003dd03496028ea88 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 13 Nov 2023 15:43:11 -0500 Subject: [PATCH 109/126] Various permission fixes for osdeploy initialize Fix a few scenarios where certain ordering of initialize creates unworkable permissions. --- confluent_server/bin/osdeploy | 28 ++++++++++++++++--- confluent_server/confluent/certutil.py | 38 ++++++++++++++------------ confluent_server/confluent/sshutil.py | 13 +++++---- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/confluent_server/bin/osdeploy b/confluent_server/bin/osdeploy index ed39e78c..ef6859e3 100644 --- a/confluent_server/bin/osdeploy +++ b/confluent_server/bin/osdeploy @@ -373,9 +373,14 @@ def initialize(cmdset): for rsp in c.read('/uuid'): uuid = rsp.get('uuid', {}).get('value', None) if uuid: - with open('confluent_uuid', 'w') as uuidout: - uuidout.write(uuid) - uuidout.write('\n') + oum = os.umask(0o11) + try: + with open('confluent_uuid', 'w') as uuidout: + uuidout.write(uuid) + uuidout.write('\n') + os.chmod('confluent_uuid', 0o644) + finally: + os.umask(oum) totar.append('confluent_uuid') topack.append('confluent_uuid') if os.path.exists('ssh'): @@ -403,7 +408,17 @@ def initialize(cmdset): if res: sys.stderr.write('Error occurred while packing site initramfs') sys.exit(1) - os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio') + oum = os.umask(0o22) + try: + os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio') + os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + finally: + os.umask(oum) + oum = os.umask(0o22) + try: + os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + finally: + os.umask(oum) if cmdset.g: updateboot('genesis-x86_64') if totar: @@ -411,6 +426,11 @@ def initialize(cmdset): tarcmd = ['tar', '-czf', tmptarname] + totar subprocess.check_call(tarcmd) os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz') + oum = os.umask(0o22) + try: + os.chown('/var/lib/confluent/public/site/initramfs.tgz', 0o644) + finally: + os.umask(0o22) os.chdir(opath) print('Site initramfs content packed successfully') diff --git a/confluent_server/confluent/certutil.py b/confluent_server/confluent/certutil.py index dffaf85e..2e788bad 100644 --- a/confluent_server/confluent/certutil.py +++ b/confluent_server/confluent/certutil.py @@ -95,27 +95,29 @@ def assure_tls_ca(): os.makedirs(os.path.dirname(fname)) except OSError as e: if e.errno != 17: + os.seteuid(ouid) raise + try: + shutil.copy2('/etc/confluent/tls/cacert.pem', fname) + hv, _ = util.run( + ['openssl', 'x509', '-in', '/etc/confluent/tls/cacert.pem', '-hash', '-noout']) + if not isinstance(hv, str): + hv = hv.decode('utf8') + hv = hv.strip() + hashname = '/var/lib/confluent/public/site/tls/{0}.0'.format(hv) + certname = '{0}.pem'.format(collective.get_myname()) + for currname in os.listdir('/var/lib/confluent/public/site/tls/'): + currname = os.path.join('/var/lib/confluent/public/site/tls/', currname) + if currname.endswith('.0'): + try: + realname = os.readlink(currname) + if realname == certname: + os.unlink(currname) + except OSError: + pass + os.symlink(certname, hashname) finally: os.seteuid(ouid) - shutil.copy2('/etc/confluent/tls/cacert.pem', fname) - hv, _ = util.run( - ['openssl', 'x509', '-in', '/etc/confluent/tls/cacert.pem', '-hash', '-noout']) - if not isinstance(hv, str): - hv = hv.decode('utf8') - hv = hv.strip() - hashname = '/var/lib/confluent/public/site/tls/{0}.0'.format(hv) - certname = '{0}.pem'.format(collective.get_myname()) - for currname in os.listdir('/var/lib/confluent/public/site/tls/'): - currname = os.path.join('/var/lib/confluent/public/site/tls/', currname) - if currname.endswith('.0'): - try: - realname = os.readlink(currname) - if realname == certname: - os.unlink(currname) - except OSError: - pass - os.symlink(certname, hashname) def substitute_cfg(setting, key, val, newval, cfgfile, line): if key.strip() == setting: diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index d097ade1..16e4db7e 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -98,14 +98,15 @@ def initialize_ca(): preexec_fn=normalize_uid) ouid = normalize_uid() try: - os.makedirs('/var/lib/confluent/public/site/ssh/', mode=0o755) - except OSError as e: - if e.errno != 17: - raise + try: + os.makedirs('/var/lib/confluent/public/site/ssh/', mode=0o755) + except OSError as e: + if e.errno != 17: + raise + cafilename = '/var/lib/confluent/public/site/ssh/{0}.ca'.format(myname) + shutil.copy('/etc/confluent/ssh/ca.pub', cafilename) finally: os.seteuid(ouid) - cafilename = '/var/lib/confluent/public/site/ssh/{0}.ca'.format(myname) - shutil.copy('/etc/confluent/ssh/ca.pub', cafilename) # newent = '@cert-authority * ' + capub.read() From cd07e0e212a8c526074a24ce0487e100e7dc1221 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 14 Nov 2023 15:14:54 -0500 Subject: [PATCH 110/126] Add missing disclaimer from tmt license --- confluent_vtbufferd/NOTICE | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/confluent_vtbufferd/NOTICE b/confluent_vtbufferd/NOTICE index 95b86a82..da174e81 100644 --- a/confluent_vtbufferd/NOTICE +++ b/confluent_vtbufferd/NOTICE @@ -22,3 +22,16 @@ modification, are permitted provided that the following conditions are met: * Neither the name of the copyright holder nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS, +* COPYRIGHT HOLDERS, OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + From c9452e65e8f35916adab7cb7257ca02e537beda5 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 15 Nov 2023 11:30:20 -0500 Subject: [PATCH 111/126] Fix some osdeploy ordering issues osdeploy initialization dependencies have been improved and marked if absolutely dependent. --- confluent_server/bin/osdeploy | 46 +++++++++++++++++---------- confluent_server/confluent/sshutil.py | 8 +++++ confluent_server/confluent/util.py | 4 +-- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/confluent_server/bin/osdeploy b/confluent_server/bin/osdeploy index ef6859e3..fff220be 100644 --- a/confluent_server/bin/osdeploy +++ b/confluent_server/bin/osdeploy @@ -72,6 +72,12 @@ def main(args): return rebase(cmdset.profile) ap.print_help() +def symlinkp(src, trg): + try: + os.symlink(src, trg) + except Exception as e: + if e.errno != 17: + raise def initialize_genesis(): if not os.path.exists('/opt/confluent/genesis/x86_64/boot/kernel'): @@ -89,30 +95,33 @@ def initialize_genesis(): return retval[1] retcode = 0 try: + util.mkdirp('/var/lib/confluent', 0o755) if hasconfluentuser: + os.chown('/var/lib/confluent', hasconfluentuser.pw_uid, -1) os.setgid(hasconfluentuser.pw_gid) os.setuid(hasconfluentuser.pw_uid) os.umask(0o22) - os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755) - os.makedirs('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755) - os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI', + util.mkdirp('/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot', 0o755) + util.mkdirp('/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs', 0o755) + symlinkp('/opt/confluent/genesis/x86_64/boot/efi/boot/BOOTX64.EFI', '/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/BOOTX64.EFI') - os.symlink('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi', + symlinkp('/opt/confluent/genesis/x86_64/boot/efi/boot/grubx64.efi', '/var/lib/confluent/public/os/genesis-x86_64/boot/efi/boot/grubx64.efi') - os.symlink('/opt/confluent/genesis/x86_64/boot/initramfs/distribution', + symlinkp('/opt/confluent/genesis/x86_64/boot/initramfs/distribution', '/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/distribution') - os.symlink('/var/lib/confluent/public/site/initramfs.cpio', + symlinkp('/var/lib/confluent/public/site/initramfs.cpio', '/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/site.cpio') - os.symlink('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio', + symlinkp('/opt/confluent/lib/osdeploy/genesis/initramfs/addons.cpio', '/var/lib/confluent/public/os/genesis-x86_64/boot/initramfs/addons.cpio') - os.symlink('/opt/confluent/genesis/x86_64/boot/kernel', + symlinkp('/opt/confluent/genesis/x86_64/boot/kernel', '/var/lib/confluent/public/os/genesis-x86_64/boot/kernel') - shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/ansible/', - '/var/lib/confluent/public/os/genesis-x86_64/ansible/') - shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/', - '/var/lib/confluent/public/os/genesis-x86_64/scripts/') - shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml', - '/var/lib/confluent/public/os/genesis-x86_64/profile.yaml') + if not os.path.exists('/var/lib/confluent/public/os/genesis-x86_64/ansible/'): + shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/ansible/', + '/var/lib/confluent/public/os/genesis-x86_64/ansible/') + shutil.copytree('/opt/confluent/lib/osdeploy/genesis/profiles/default/scripts/', + '/var/lib/confluent/public/os/genesis-x86_64/scripts/') + shutil.copyfile('/opt/confluent/lib/osdeploy/genesis/profiles/default/profile.yaml', + '/var/lib/confluent/public/os/genesis-x86_64/profile.yaml') except Exception as e: sys.stderr.write(str(e) + '\n') retcode = 1 @@ -411,12 +420,12 @@ def initialize(cmdset): oum = os.umask(0o22) try: os.rename(tmpname, '/var/lib/confluent/public/site/initramfs.cpio') - os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + os.chmod('/var/lib/confluent/public/site/initramfs.cpio', 0o644) finally: os.umask(oum) oum = os.umask(0o22) try: - os.chown('/var/lib/confluent/public/site/initramfs.cpio', 0o644) + os.chmod('/var/lib/confluent/public/site/initramfs.cpio', 0o644) finally: os.umask(oum) if cmdset.g: @@ -428,7 +437,7 @@ def initialize(cmdset): os.rename(tmptarname, '/var/lib/confluent/public/site/initramfs.tgz') oum = os.umask(0o22) try: - os.chown('/var/lib/confluent/public/site/initramfs.tgz', 0o644) + os.chmod('/var/lib/confluent/public/site/initramfs.tgz', 0o644) finally: os.umask(0o22) os.chdir(opath) @@ -441,6 +450,9 @@ def initialize(cmdset): def updateboot(profilename): + if not os.path.exists('/var/lib/confluent/public/site/initramfs.cpio'): + emprint('Must generate site content first (TLS (-t) and/or SSH (-s))') + return 1 c = client.Command() for rsp in c.update('/deployment/profiles/{0}'.format(profilename), {'updateboot': 1}): diff --git a/confluent_server/confluent/sshutil.py b/confluent_server/confluent/sshutil.py index 16e4db7e..cf17f37a 100644 --- a/confluent_server/confluent/sshutil.py +++ b/confluent_server/confluent/sshutil.py @@ -186,6 +186,14 @@ def initialize_root_key(generate, automation=False): if os.path.exists('/etc/confluent/ssh/automation'): alreadyexist = True else: + ouid = normalize_uid() + try: + os.makedirs('/etc/confluent/ssh', mode=0o700) + except OSError as e: + if e.errno != 17: + raise + finally: + os.seteuid(ouid) subprocess.check_call( ['ssh-keygen', '-t', 'ed25519', '-f','/etc/confluent/ssh/automation', '-N', get_passphrase(), diff --git a/confluent_server/confluent/util.py b/confluent_server/confluent/util.py index 8cf9bbc9..96d2291b 100644 --- a/confluent_server/confluent/util.py +++ b/confluent_server/confluent/util.py @@ -29,9 +29,9 @@ import struct import eventlet.green.subprocess as subprocess -def mkdirp(path): +def mkdirp(path, mode=0o777): try: - os.makedirs(path) + os.makedirs(path, mode) except OSError as e: if e.errno != 17: raise From 9757cd1ae32e343f6eddcd47067f2440a1c070d4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 16 Nov 2023 10:17:55 -0500 Subject: [PATCH 112/126] Check the profile *before* rebooting systems This provides a much better experience when a typo or other mistake has a profile that is not actionable. --- confluent_client/bin/nodedeploy | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/confluent_client/bin/nodedeploy b/confluent_client/bin/nodedeploy index 2417f2c5..52e3a7d9 100755 --- a/confluent_client/bin/nodedeploy +++ b/confluent_client/bin/nodedeploy @@ -90,17 +90,6 @@ def main(args): if 'error' in rsp: sys.stderr.write(rsp['error'] + '\n') sys.exit(1) - if not args.clear and args.network and not args.prepareonly: - rc = c.simple_noderange_command(args.noderange, '/boot/nextdevice', 'network', - bootmode='uefi', - persistent=False, - errnodes=errnodes) - if errnodes: - sys.stderr.write( - 'Unable to set boot device for following nodes: {0}\n'.format( - ','.join(errnodes))) - return 1 - rc |= c.simple_noderange_command(args.noderange, '/power/state', 'boot') if args.clear: cleararm(args.noderange, c) clearpending(args.noderange, c) @@ -120,7 +109,7 @@ def main(args): for profname in profnames: sys.stderr.write(' ' + profname + '\n') else: - sys.stderr.write('No deployment profiles available, try osdeploy fiimport or imgutil capture\n') + sys.stderr.write('No deployment profiles available, try osdeploy import or imgutil capture\n') sys.exit(1) armonce(args.noderange, c) setpending(args.noderange, args.profile, c) @@ -166,6 +155,17 @@ def main(args): else: print('{0}: {1}{2}'.format(node, profile, armed)) sys.exit(0) + if not args.clear and args.network and not args.prepareonly: + rc = c.simple_noderange_command(args.noderange, '/boot/nextdevice', 'network', + bootmode='uefi', + persistent=False, + errnodes=errnodes) + if errnodes: + sys.stderr.write( + 'Unable to set boot device for following nodes: {0}\n'.format( + ','.join(errnodes))) + return 1 + rc |= c.simple_noderange_command(args.noderange, '/power/state', 'boot') if args.network and not args.prepareonly: return rc return 0 From 68ce3d039d7b26125222d2bd51e5d911b0ea14fe Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Mon, 27 Nov 2023 08:34:34 -0500 Subject: [PATCH 113/126] Filter out nvme 'c' devnames, that are used to refer to paths to nvme Some versions start manifesting nvme devnames with 'c', which are to be used to interact with multipath to have raw devices backing a traditional nvme device. --- .../el7-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk | 2 ++ .../el8-diskless/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk | 2 ++ .../el9-diskless/profiles/default/scripts/getinstalldisk | 2 ++ .../rhvh4/profiles/default/scripts/getinstalldisk | 2 ++ confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk | 2 ++ .../suse15/profiles/server/scripts/getinstalldisk | 2 ++ .../profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu20.04/profiles/default/scripts/getinstalldisk | 2 ++ .../ubuntu22.04/profiles/default/scripts/getinstalldisk | 2 ++ 11 files changed, 22 insertions(+) diff --git a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el7/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el8/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/rhvh4/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk +++ b/confluent_osdeploy/suse15/profiles/server/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04-diskless/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu20.04/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None diff --git a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk index 522aba00..04c7708e 100644 --- a/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk +++ b/confluent_osdeploy/ubuntu22.04/profiles/default/scripts/getinstalldisk @@ -3,6 +3,8 @@ import os class DiskInfo(object): def __init__(self, devname): + if devname.startswith('nvme') and 'c' in devname: + raise Exception("Skipping multipath devname") self.name = devname self.wwn = None self.path = None From 0b28d64c83f439412fcbf4c02b6aae84657b52c3 Mon Sep 17 00:00:00 2001 From: Christian Goll Date: Mon, 27 Nov 2023 15:00:27 +0100 Subject: [PATCH 114/126] python3-dbm is required for SUSE the python module `anydbm` is part of this python package Signed-off-by: Christian Goll --- confluent_server/confluent_server.spec.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confluent_server/confluent_server.spec.tmpl b/confluent_server/confluent_server.spec.tmpl index c7e2aa3a..51046a8f 100644 --- a/confluent_server/confluent_server.spec.tmpl +++ b/confluent_server/confluent_server.spec.tmpl @@ -22,7 +22,7 @@ Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3- %if "%{dist}" == ".el9" Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodomex >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dns, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-yaml openssl iproute %else -Requires: python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodome >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dnspython, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-PyYAML openssl iproute +Requires: python3-dbm,python3-pyghmi >= 1.0.34, python3-eventlet, python3-greenlet, python3-pycryptodome >= 3.4.7, confluent_client == %{version}, python3-pyparsing, python3-paramiko, python3-dnspython, python3-netifaces, python3-pyasn1 >= 0.2.3, python3-pysnmp >= 4.3.4, python3-lxml, python3-eficompressor, python3-setuptools, python3-dateutil, python3-cffi, python3-pyOpenSSL, python3-websocket-client python3-msgpack python3-libarchive-c python3-PyYAML openssl iproute %endif %endif %endif From 3730ba049f97b4d007b538b058055fd93e0aa8a4 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 13:11:25 -0500 Subject: [PATCH 115/126] Fix potential doubling up of IPv6 brackets There were scenarios where IPv6 URL brackets may double up. --- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 4 ++-- .../usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 4 ++-- .../lib/dracut/hooks/cmdline/10-confluentdiskless.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 65abf8f6..cdcc12fd 100644 --- a/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el8-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -155,7 +155,7 @@ fi ready=0 while [ $ready = "0" ]; do get_remote_apikey - if [[ $confluent_mgr == *:* ]]; then + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then confluent_mgr="[$confluent_mgr]" fi tmperr=$(mktemp) @@ -324,7 +324,7 @@ fi echo '[proxy]' >> /run/NetworkManager/system-connections/$ifname.nmconnection chmod 600 /run/NetworkManager/system-connections/*.nmconnection confluent_websrv=$confluent_mgr -if [[ $confluent_websrv == *:* ]]; then +if [[ $confluent_websrv == *:* ]] && [[ $confluent_websrv != "["* ]]; then confluent_websrv="[$confluent_websrv]" fi echo -n "Initializing ssh..." diff --git a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index a9eba388..a4f10ee2 100644 --- a/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/el9-diskless/initramfs/usr/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -120,7 +120,7 @@ fi ready=0 while [ $ready = "0" ]; do get_remote_apikey - if [[ $confluent_mgr == *:* ]]; then + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then confluent_mgr="[$confluent_mgr]" fi tmperr=$(mktemp) @@ -281,7 +281,7 @@ fi echo '[proxy]' >> /run/NetworkManager/system-connections/$ifname.nmconnection chmod 600 /run/NetworkManager/system-connections/*.nmconnection confluent_websrv=$confluent_mgr -if [[ $confluent_websrv == *:* ]]; then +if [[ $confluent_websrv == *:* ]] && [[ $confluent_websrv != "["* ]]; then confluent_websrv="[$confluent_websrv]" fi echo -n "Initializing ssh..." diff --git a/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh b/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh index 146c4797..5586978c 100644 --- a/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh +++ b/confluent_osdeploy/suse15-diskless/initramfs/lib/dracut/hooks/cmdline/10-confluentdiskless.sh @@ -116,7 +116,7 @@ fi ready=0 while [ $ready = "0" ]; do get_remote_apikey - if [[ $confluent_mgr == *:* ]]; then + if [[ $confluent_mgr == *:* ]] && [[ $confluent_mgr != "["* ]]; then confluent_mgr="[$confluent_mgr]" fi tmperr=$(mktemp) From 55e60d52fd0c693b831df0983938d85f4e0b6a33 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 13:33:21 -0500 Subject: [PATCH 116/126] Avoid potential multiple brackets in imageboot.sh --- .../el8-diskless/profiles/default/scripts/imageboot.sh | 2 +- .../el9-diskless/profiles/default/scripts/imageboot.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh index ee2a8125..fe53bf38 100644 --- a/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el8-diskless/profiles/default/scripts/imageboot.sh @@ -1,6 +1,6 @@ . /lib/dracut-lib.sh confluent_whost=$confluent_mgr -if [[ "$confluent_whost" == *:* ]]; then +if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh index ee2a8125..fe53bf38 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/imageboot.sh @@ -1,6 +1,6 @@ . /lib/dracut-lib.sh confluent_whost=$confluent_mgr -if [[ "$confluent_whost" == *:* ]]; then +if [[ "$confluent_whost" == *:* ]] && [[ "$confluent_whost" != "["* ]]; then confluent_whost="[$confluent_mgr]" fi mkdir -p /mnt/remoteimg /mnt/remote /mnt/overlay From 63b737dc52551b43e2f000d53b5b6d940cb3264d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 14:09:59 -0500 Subject: [PATCH 117/126] Correct bonding in confignet for NetworkManager --- confluent_osdeploy/common/profile/scripts/confignet | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index cb2569ce..f19e620c 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -379,6 +379,8 @@ class NetworkManager(object): for arg in cmdargs: cargs.append(arg) cargs.append(cmdargs[arg]) + if stgs['team_mode'] == 'lacp': + stgs['team_mode'] = '802.3ad' subprocess.check_call(['nmcli', 'c', 'add', 'type', 'bond', 'con-name', cname, 'connection.interface-name', cname, 'bond.options', 'mode={}'.format(stgs['team_mode'])] + cargs) for iface in cfg['interfaces']: self.add_team_member(cname, iface) From 6763c863879368371b6c134f549e52c6dafff998 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 28 Nov 2023 14:35:01 -0500 Subject: [PATCH 118/126] Add DNS to NetworkManager Similar to netplan, apply DNS to every interface. --- .../common/profile/scripts/confignet | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index f19e620c..44eb32ed 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -295,7 +295,8 @@ class WickedManager(object): class NetworkManager(object): - def __init__(self, devtypes): + def __init__(self, devtypes, deploycfg): + self.deploycfg = deploycfg self.connections = {} self.uuidbyname = {} self.uuidbydev = {} @@ -367,6 +368,20 @@ class NetworkManager(object): cmdargs['ipv4.gateway'] = stgs['ipv4_gateway'] if stgs.get('ipv6_gateway', None): cmdargs['ipv6.gateway'] = stgs['ipv6_gateway'] + dnsips = self.deploycfg.get('nameservers', []) + if not dnsips: + dnsips = [] + dns4 = [] + dns6 = [] + for dnsip in dnsips: + if '.' in dnsip: + dns4.append(dnsip) + elif ':' in dnsip: + dns6.append(dnsip) + if dns4: + cmdargs['ipv4.dns'] = dns4.join(',') + if dns6: + cmdargs['ipv6.dns'] = dns6.join(',') if len(cfg['interfaces']) > 1: # team time.. should be.. if not cfg['settings'].get('team_mode', None): sys.stderr.write("Warning, multiple interfaces ({0}) without a team_mode, skipping setup\n".format(','.join(cfg['interfaces']))) @@ -487,7 +502,7 @@ if __name__ == '__main__': if os.path.exists('/usr/sbin/netplan'): nm = NetplanManager(dc) if os.path.exists('/usr/bin/nmcli'): - nm = NetworkManager(devtypes) + nm = NetworkManager(devtypes, dc) elif os.path.exists('/usr/sbin/wicked'): nm = WickedManager() for netn in netname_to_interfaces: From 3aa91b61e5befc32e0ebe851b986c84d34634053 Mon Sep 17 00:00:00 2001 From: Christian Goll Date: Fri, 1 Dec 2023 10:57:31 +0100 Subject: [PATCH 119/126] disable online repos for openSUSE leap online repositories may not be accesible for the cluster nodes but were added from the content.xml. Editing this files with initprofile.sh is impossible as they are executed in parallel, so all repos starting with https?://download.opensuse.org are removed during post Signed-off-by: Christian Goll --- confluent_osdeploy/suse15/profiles/hpc/autoyast.leap | 6 ++++++ confluent_osdeploy/suse15/profiles/hpc/initprofile.sh | 3 +++ .../profiles/hpc/scripts/post.d/10-remove-online-repos.sh | 3 +++ confluent_osdeploy/suse15/profiles/server/autoyast.leap | 6 ++++++ confluent_osdeploy/suse15/profiles/server/initprofile.sh | 3 +++ .../server/scripts/post.d/10-remove-online-repos.sh | 3 +++ 6 files changed, 24 insertions(+) create mode 100644 confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh create mode 100644 confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh diff --git a/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap b/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap index 7f9d08f7..e92ec9fd 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap +++ b/confluent_osdeploy/suse15/profiles/hpc/autoyast.leap @@ -10,6 +10,12 @@ dynamic behavior and replace with static configuration. UTC %%TIMEZONE%% + + false + + + false + false diff --git a/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh b/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh index 9c6c295e..62a2663e 100644 --- a/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh +++ b/confluent_osdeploy/suse15/profiles/hpc/initprofile.sh @@ -1,4 +1,7 @@ #!/bin/sh +# WARNING +# be careful when editing files here as this script is called +# in parallel to other copy operations, so changes to files can be lost discnum=$(basename $1) if [ "$discnum" != 1 ]; then exit 0; fi if [ -e $2/boot/kernel ]; then exit 0; fi diff --git a/confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh b/confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh new file mode 100644 index 00000000..9ae8224e --- /dev/null +++ b/confluent_osdeploy/suse15/profiles/hpc/scripts/post.d/10-remove-online-repos.sh @@ -0,0 +1,3 @@ +#!/usr/bin/bash +# remove online repos +grep -lE "baseurl=https?://download.opensuse.org" /etc/zypp/repos.d/*repo | xargs rm -- diff --git a/confluent_osdeploy/suse15/profiles/server/autoyast.leap b/confluent_osdeploy/suse15/profiles/server/autoyast.leap index 7f9d08f7..e92ec9fd 100644 --- a/confluent_osdeploy/suse15/profiles/server/autoyast.leap +++ b/confluent_osdeploy/suse15/profiles/server/autoyast.leap @@ -10,6 +10,12 @@ dynamic behavior and replace with static configuration. UTC %%TIMEZONE%% + + false + + + false + false diff --git a/confluent_osdeploy/suse15/profiles/server/initprofile.sh b/confluent_osdeploy/suse15/profiles/server/initprofile.sh index 9c6c295e..62a2663e 100644 --- a/confluent_osdeploy/suse15/profiles/server/initprofile.sh +++ b/confluent_osdeploy/suse15/profiles/server/initprofile.sh @@ -1,4 +1,7 @@ #!/bin/sh +# WARNING +# be careful when editing files here as this script is called +# in parallel to other copy operations, so changes to files can be lost discnum=$(basename $1) if [ "$discnum" != 1 ]; then exit 0; fi if [ -e $2/boot/kernel ]; then exit 0; fi diff --git a/confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh b/confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh new file mode 100644 index 00000000..9ae8224e --- /dev/null +++ b/confluent_osdeploy/suse15/profiles/server/scripts/post.d/10-remove-online-repos.sh @@ -0,0 +1,3 @@ +#!/usr/bin/bash +# remove online repos +grep -lE "baseurl=https?://download.opensuse.org" /etc/zypp/repos.d/*repo | xargs rm -- From 7b89054b35e63728755c51035ef86338ded391a1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 1 Dec 2023 15:55:17 -0500 Subject: [PATCH 120/126] Fix a few noderange abbreviations Also, add some test cases on abbreviation to help sanity check things in the future. --- confluent_server/confluent/noderange.py | 46 ++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/confluent_server/confluent/noderange.py b/confluent_server/confluent/noderange.py index df4552b8..cf99dd72 100644 --- a/confluent_server/confluent/noderange.py +++ b/confluent_server/confluent/noderange.py @@ -96,6 +96,7 @@ class Bracketer(object): txtnums = getnumbers_nodename(nodename) nums = [int(x) for x in txtnums] for n in range(self.count): + # First pass to see if we have exactly one different number padto = len(txtnums[n]) needpad = (padto != len('{}'.format(nums[n]))) if self.sequences[n] is None: @@ -105,7 +106,24 @@ class Bracketer(object): elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: continue # new nodename has no new number, keep going else: # if self.sequences[n][2] != nums[n] or : - if self.diffn is not None and (n != self.diffn or + if self.diffn is not None and (n != self.diffn or + (padto < self.numlens[n][1]) or + (needpad and padto != self.numlens[n][1])): + self.flush_current() + self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [padto, padto] + self.diffn = n + for n in range(self.count): + padto = len(txtnums[n]) + needpad = (padto != len('{}'.format(nums[n]))) + if self.sequences[n] is None: + # We initialize to text pieces, 'currstart', and 'prev' number + self.sequences[n] = [[], nums[n], nums[n]] + self.numlens[n] = [len(txtnums[n]), len(txtnums[n])] + elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto: + continue # new nodename has no new number, keep going + else: # if self.sequences[n][2] != nums[n] or : + if self.diffn is not None and (n != self.diffn or (padto < self.numlens[n][1]) or (needpad and padto != self.numlens[n][1])): self.flush_current() @@ -449,3 +467,29 @@ class NodeRange(object): if self.cfm is None: return set([element]) raise Exception(element + ' not a recognized node, group, or alias') + +if __name__ == '__main__': + cases = [ + (['r3u4', 'r5u6'], 'r3u4,r5u6'), # should not erroneously gather + (['r3u4s1', 'r5u6s3'], 'r3u4s1,r5u6s3'), # should not erroneously gather + (['r3u4s1', 'r3u4s2', 'r5u4s3'], 'r3u4s[1:2],r5u4s3'), # should not erroneously gather + (['r3u4', 'r3u5', 'r3u6', 'r3u9', 'r4u1'], 'r3u[4:6,9],r4u1'), + (['n01', 'n2', 'n03'], 'n01,n2,n03'), + (['n7', 'n8', 'n09', 'n10', 'n11', 'n12', 'n13', 'n14', 'n15', 'n16', + 'n17', 'n18', 'n19', 'n20'], 'n[7:8],n[09:20]') + ] + for case in cases: + gc = case[0] + bracketer = Bracketer(gc[0]) + for chnk in gc[1:]: + bracketer.extend(chnk) + br = bracketer.range + resnodes = NodeRange(br).nodes + if set(resnodes) != set(gc): + print('FAILED: ' + repr(sorted(gc))) + print('RESULT: ' + repr(sorted(resnodes))) + print('EXPECTED: ' + repr(case[1])) + print('ACTUAL: ' + br) + + + From 7aef012a42f6859df8e8bbece7f208dc568935d8 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Tue, 5 Dec 2023 14:39:36 -0500 Subject: [PATCH 121/126] Correct string join syntax in confignet --- confluent_osdeploy/common/profile/scripts/confignet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/confluent_osdeploy/common/profile/scripts/confignet b/confluent_osdeploy/common/profile/scripts/confignet index 44eb32ed..8cda6c83 100644 --- a/confluent_osdeploy/common/profile/scripts/confignet +++ b/confluent_osdeploy/common/profile/scripts/confignet @@ -379,9 +379,9 @@ class NetworkManager(object): elif ':' in dnsip: dns6.append(dnsip) if dns4: - cmdargs['ipv4.dns'] = dns4.join(',') + cmdargs['ipv4.dns'] = ','.join(dns4) if dns6: - cmdargs['ipv6.dns'] = dns6.join(',') + cmdargs['ipv6.dns'] = ','.join(dns6) if len(cfg['interfaces']) > 1: # team time.. should be.. if not cfg['settings'].get('team_mode', None): sys.stderr.write("Warning, multiple interfaces ({0}) without a team_mode, skipping setup\n".format(','.join(cfg['interfaces']))) From 93269a05ebb66c4b97ab484402ce55808fda2101 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 6 Dec 2023 17:06:09 -0500 Subject: [PATCH 122/126] Fix cloning with ipv6 and EL9 --- .../profiles/default/scripts/firstboot.sh | 11 +++++++++-- .../el9-diskless/profiles/default/scripts/post.sh | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh index 2bab4136..ed11d9e7 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/firstboot.sh @@ -9,9 +9,16 @@ HOME=$(getent passwd $(whoami)|cut -d: -f 6) export HOME nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) -confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_websrv=$confluent_mgr +if [[ "$confluent_mgr" == *:* ]]; then + confluent_websrv="[$confluent_mgr]" +fi confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -export nodename confluent_mgr confluent_profile +export nodename confluent_mgr confluent_profile confluent_websrv . /etc/confluent/functions ( exec >> /var/log/confluent/confluent-firstboot.log diff --git a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh index 3a52d128..3b20a946 100644 --- a/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh +++ b/confluent_osdeploy/el9-diskless/profiles/default/scripts/post.sh @@ -5,9 +5,16 @@ nodename=$(grep ^NODENAME /etc/confluent/confluent.info|awk '{print $2}') confluent_apikey=$(cat /etc/confluent/confluent.apikey) -confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') confluent_profile=$(grep ^profile: /etc/confluent/confluent.deploycfg|awk '{print $2}') -export nodename confluent_mgr confluent_profile +confluent_mgr=$(grep ^deploy_server_v6: /etc/confluent/confluent.deploycfg|awk '{print $2}') +if [ -z "$confluent_mgr" ] || [ "$confluent_mgr" == "null" ] || ! ping -c 1 $confluent_mgr >& /dev/null; then + confluent_mgr=$(grep ^deploy_server: /etc/confluent/confluent.deploycfg|awk '{print $2}') +fi +confluent_websrv=$confluent_mgr +if [[ "$confluent_mgr" == *:* ]]; then + confluent_websrv="[$confluent_mgr]" +fi +export nodename confluent_mgr confluent_profile confluent_websrv . /etc/confluent/functions mkdir -p /var/log/confluent chmod 700 /var/log/confluent From 85629dea64d202c02a5f99191b4535b7743a1e03 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 7 Dec 2023 14:44:42 -0500 Subject: [PATCH 123/126] Prevent unitiailized collective info When doing proxyconsole, don't land in a useless retach loop when managerinfo is None. --- confluent_server/confluent/consoleserver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index 37274792..fb607a27 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -622,6 +622,8 @@ def connect_node(node, configmanager, username=None, direct=True, width=80, myname = collective.get_myname() if myc and myc != collective.get_myname() and direct: minfo = configmodule.get_collective_member(myc) + if not minfo: + raise Exception('Unable to get collective member for {}'.format(node)) return ProxyConsole(node, minfo, myname, configmanager, username, width, height) consk = (node, configmanager.tenant) From b0e23121a84c9b8d980e5bb0cd4f3edc19f2b325 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Jan 2024 15:03:49 -0500 Subject: [PATCH 124/126] Add stub resize handler For uninitialized console handlers, provide a stub to do nothing on resize. This avoids such a request crashing a shared websocket session. --- confluent_server/confluent/consoleserver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/consoleserver.py b/confluent_server/confluent/consoleserver.py index fb607a27..ebfd8c97 100644 --- a/confluent_server/confluent/consoleserver.py +++ b/confluent_server/confluent/consoleserver.py @@ -175,6 +175,9 @@ class ConsoleHandler(object): self.connectstate = 'connecting' eventlet.spawn(self._connect) + def resize(self, width, height): + return None + def _get_retry_time(self): clustsize = len(self.cfgmgr._cfgstore['nodes']) self._retrytime = self._retrytime * 2 + 1 From 39c00323b391d1ef0e83aad60fd5f543b68702bd Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 3 Jan 2024 15:58:24 -0500 Subject: [PATCH 125/126] Fix error where layout would bail if a partial error were encountered --- confluent_server/confluent/plugins/info/layout.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index 8397af7f..76b07ac7 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -93,6 +93,9 @@ def retrieve(nodes, element, configmanager, inputdata): '/noderange/{0}/description'.format(needheight), 'retrieve', configmanager, inputdata=None): + if not hasattr(rsp, 'kvpairs'): + results['errors'].append((rsp.node, rsp.error)) + continue kvp = rsp.kvpairs for node in kvp: allnodedata[node]['height'] = kvp[node]['height'] From 4d639081645339ca9dcee1c5922eeb489172e9c1 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Thu, 4 Jan 2024 11:17:02 -0500 Subject: [PATCH 126/126] Have a fallback height of 1 for any missing height --- confluent_server/confluent/plugins/info/layout.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/confluent_server/confluent/plugins/info/layout.py b/confluent_server/confluent/plugins/info/layout.py index 76b07ac7..ca7f120c 100644 --- a/confluent_server/confluent/plugins/info/layout.py +++ b/confluent_server/confluent/plugins/info/layout.py @@ -99,5 +99,8 @@ def retrieve(nodes, element, configmanager, inputdata): kvp = rsp.kvpairs for node in kvp: allnodedata[node]['height'] = kvp[node]['height'] + for node in allnodedata: + if 'height' not in allnodedata[node]: + allnodedata[node]['height'] = 1 yield msg.Generic(results)