From a3b0b0240dc5ef048e9cce8d077552aed1961279 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Thu, 27 Sep 2018 10:30:30 -0400
Subject: [PATCH 01/20] Abort assimilation attempt on non-member cleanly

If a confluent instance has forgotten the collective, more cleanly
handle the situation, and abort the assimilation rather than assuming
the peer should be leader, unless txcount specifically is called out
as the reason.
---
 confluent_server/confluent/collective/manager.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index 735d9b1b..dae9279c 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -322,6 +322,11 @@ def handle_connection(connection, cert, request, local=False):
     if 'assimilate' == operation:
         drone = request['name']
         droneinfo = cfm.get_collective_member(drone)
+        if not droneinfo:
+            tlvdata.send(connection,
+                         {'error': 'Unrecognized leader, '
+                                   'redo invitation process'})
+            return
         if not util.cert_matches(droneinfo['fingerprint'], cert):
             tlvdata.send(connection,
                          {'error': 'Invalid certificate, '
@@ -432,7 +437,7 @@ def try_assimilate(drone):
     tlvdata.recv(remote)  # the banner
     tlvdata.recv(remote)  # authpassed... 0..
     answer = tlvdata.recv(remote)
-    if answer and 'error' in answer:
+    if answer and 'txcount' in answer:
         connect_to_leader(None, None, leader=remote.getpeername()[0])
 
 def get_leader(connection):

From 0724ad812b924ec433fa48a6a90573cca31bf79b Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Thu, 27 Sep 2018 10:38:00 -0400
Subject: [PATCH 02/20] Add logging to the assimilation phase of collective

When attempting assimilation, provide logging about the attempt.
---
 .../confluent/collective/manager.py           | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index dae9279c..9c864bfc 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -18,6 +18,7 @@ import base64
 import confluent.collective.invites as invites
 import confluent.config.configmanager as cfm
 import confluent.exceptions as exc
+import confluent.log as log
 import confluent.tlvdata as tlvdata
 import confluent.util as util
 import eventlet
@@ -437,8 +438,24 @@ def try_assimilate(drone):
     tlvdata.recv(remote)  # the banner
     tlvdata.recv(remote)  # authpassed... 0..
     answer = tlvdata.recv(remote)
-    if answer and 'txcount' in answer:
+    if not answer:
+        log.log(
+            {'error':
+                 'No answer from {0} while trying to assimilate'.format(
+                     drone)})
+        return
+    if 'txcount' in answer:
+        log.log({'info': 'Deferring to {0} due to transaction count'.format(
+            drone)})
         connect_to_leader(None, None, leader=remote.getpeername()[0])
+        return
+    if 'error' in answer:
+        log.log({
+            'error': 'Error encountered while attempting to '
+                     'assimilate {0}: {1}'.format(drone, answer['error'])})
+        return
+    log.log({'Assimilated {0} into collective'.format(drone)})
+
 
 def get_leader(connection):
     if currentleader is None or connection.getpeername()[0] == currentleader:

From 10ce7a9de91839493eeb3dce0a988b84af8ce9f1 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Thu, 27 Sep 2018 10:49:57 -0400
Subject: [PATCH 03/20] Add more logging to collective process

---
 .../confluent/collective/manager.py             | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index 9c864bfc..d4a4cdbf 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -62,7 +62,9 @@ def connect_to_leader(cert=None, name=None, leader=None):
         leader = currentleader
     try:
         remote = connect_to_collective(cert, leader)
-    except socket.error:
+    except socket.error as e:
+        log.log({'error': 'Collective connection attempt to {0} failed: {1}'
+                          ''.format(leader, str(e))})
         return False
     with connecting:
         with cfginitlock:
@@ -78,10 +80,16 @@ def connect_to_leader(cert=None, name=None, leader=None):
                 return False
             if 'error' in keydata:
                 if 'backoff' in keydata:
+                    log.log({
+                        'info': 'Collective initialization in progress on '
+                                '{0}, will retry connection'.format(leader)})
                     eventlet.spawn_after(random.random(), connect_to_leader,
                                          cert, name, leader)
                     return True
                 if 'leader' in keydata:
+                    log.log(
+                        {'info': 'Prospective leader {0} has redirected this '
+                                 'member to {1}'.format(leader, keydata['leader'])})
                     ldrc = cfm.get_collective_member_by_address(
                         keydata['leader'])
                     if ldrc and ldrc['name'] == name:
@@ -89,6 +97,10 @@ def connect_to_leader(cert=None, name=None, leader=None):
                     return connect_to_leader(name=name,
                                              leader=keydata['leader'])
                 if 'txcount' in keydata:
+                    log.log({'info':
+                                 'Prospective leader {0} has inferior '
+                                 'transaction count, becoming leader'
+                                 ''.format(leader)})
                     return become_leader(remote)
                 print(keydata['error'])
                 return False
@@ -96,6 +108,7 @@ def connect_to_leader(cert=None, name=None, leader=None):
                 follower.kill()
                 cfm.stop_following()
                 follower = None
+            log.log({'info': 'Following leader {0}'.format(leader)})
             colldata = tlvdata.recv(remote)
             globaldata = tlvdata.recv(remote)
             dbi = tlvdata.recv(remote)
@@ -138,6 +151,8 @@ def follow_leader(remote):
     try:
         cfm.follow_channel(remote)
     finally:
+        log.log({'info': 'Current leader has disappeared, restarting '
+                         'collective membership'})
         # The leader has folded, time to startup again...
         cfm.stop_following()
         currentleader = None

From e57cdf9a7b2cc916108a40670d133c209d31b776 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Thu, 27 Sep 2018 15:14:45 -0400
Subject: [PATCH 04/20] Add more collective event log handling

More detail to analyze how the collective membership is handled.
---
 confluent_server/confluent/collective/manager.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index d4a4cdbf..dcb11717 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -60,6 +60,7 @@ def connect_to_leader(cert=None, name=None, leader=None):
         cfginitlock = threading.RLock()
     if leader is None:
         leader = currentleader
+    log.log({'info': 'Attempting connection to leader {0}'.format(leader)})
     try:
         remote = connect_to_collective(cert, leader)
     except socket.error as e:
@@ -104,7 +105,6 @@ def connect_to_leader(cert=None, name=None, leader=None):
                     return become_leader(remote)
                 print(keydata['error'])
                 return False
-            if follower is not None:
                 follower.kill()
                 cfm.stop_following()
                 follower = None
@@ -485,6 +485,7 @@ def retire_as_leader():
 def become_leader(connection):
     global currentleader
     global follower
+    log.log({'info': 'Becoming leader of collective'})
     if follower:
         follower.kill()
         follower = None

From 61e7c90ad1e16c0e5309bfbc92f8747d4de7eefa Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 1 Oct 2018 09:35:02 -0400
Subject: [PATCH 05/20] Do not restart on intentional kill

Additionally, add some output to help filter events log
---
 .../confluent/collective/manager.py           | 40 +++++++++++++------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index dcb11717..5f326591 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -25,6 +25,7 @@ import eventlet
 import eventlet.green.socket as socket
 import eventlet.green.ssl as ssl
 import eventlet.green.threading as threading
+import greenlet
 import random
 try:
     import OpenSSL.crypto as crypto
@@ -51,7 +52,6 @@ class ContextBool(object):
 connecting = ContextBool()
 leader_init = ContextBool()
 
-
 def connect_to_leader(cert=None, name=None, leader=None):
     global currentleader
     global cfginitlock
@@ -60,12 +60,14 @@ def connect_to_leader(cert=None, name=None, leader=None):
         cfginitlock = threading.RLock()
     if leader is None:
         leader = currentleader
-    log.log({'info': 'Attempting connection to leader {0}'.format(leader)})
+    log.log({'info': 'Attempting connection to leader {0}'.format(leader),
+             'subsystem': 'collective'})
     try:
         remote = connect_to_collective(cert, leader)
     except socket.error as e:
         log.log({'error': 'Collective connection attempt to {0} failed: {1}'
-                          ''.format(leader, str(e))})
+                          ''.format(leader, str(e)),
+                 'subsystem': 'collective'})
         return False
     with connecting:
         with cfginitlock:
@@ -83,14 +85,16 @@ def connect_to_leader(cert=None, name=None, leader=None):
                 if 'backoff' in keydata:
                     log.log({
                         'info': 'Collective initialization in progress on '
-                                '{0}, will retry connection'.format(leader)})
+                                '{0}, will retry connection'.format(leader),
+                        'subsystem': 'collective'})
                     eventlet.spawn_after(random.random(), connect_to_leader,
                                          cert, name, leader)
                     return True
                 if 'leader' in keydata:
                     log.log(
                         {'info': 'Prospective leader {0} has redirected this '
-                                 'member to {1}'.format(leader, keydata['leader'])})
+                                 'member to {1}'.format(leader, keydata['leader']),
+                         'subsystem': 'collective'})
                     ldrc = cfm.get_collective_member_by_address(
                         keydata['leader'])
                     if ldrc and ldrc['name'] == name:
@@ -108,7 +112,8 @@ def connect_to_leader(cert=None, name=None, leader=None):
                 follower.kill()
                 cfm.stop_following()
                 follower = None
-            log.log({'info': 'Following leader {0}'.format(leader)})
+            log.log({'info': 'Following leader {0}'.format(leader),
+                     'subsystem': 'collective'})
             colldata = tlvdata.recv(remote)
             globaldata = tlvdata.recv(remote)
             dbi = tlvdata.recv(remote)
@@ -148,11 +153,18 @@ def connect_to_leader(cert=None, name=None, leader=None):
 
 def follow_leader(remote):
     global currentleader
+    cleanexit = False
     try:
         cfm.follow_channel(remote)
+    except greenlet.GreenletExit:
+        cleanexit = True
     finally:
+        if cleanexit:
+            log.log({'info': 'Previous following cleanly closed',
+                     'subsystem': 'collective'})
+            return
         log.log({'info': 'Current leader has disappeared, restarting '
-                         'collective membership'})
+                         'collective membership', 'subsystem': 'collective'})
         # The leader has folded, time to startup again...
         cfm.stop_following()
         currentleader = None
@@ -457,19 +469,22 @@ def try_assimilate(drone):
         log.log(
             {'error':
                  'No answer from {0} while trying to assimilate'.format(
-                     drone)})
+                     drone),
+            'subsystem': 'collective'})
         return
     if 'txcount' in answer:
         log.log({'info': 'Deferring to {0} due to transaction count'.format(
-            drone)})
+            drone), 'subsystem': 'collective'})
         connect_to_leader(None, None, leader=remote.getpeername()[0])
         return
     if 'error' in answer:
         log.log({
             'error': 'Error encountered while attempting to '
-                     'assimilate {0}: {1}'.format(drone, answer['error'])})
+                     'assimilate {0}: {1}'.format(drone, answer['error']),
+            'subsystem': 'collective'})
         return
-    log.log({'Assimilated {0} into collective'.format(drone)})
+    log.log({'info': 'Assimilated {0} into collective'.format(drone),
+             'subsystem': 'collective'})
 
 
 def get_leader(connection):
@@ -485,7 +500,8 @@ def retire_as_leader():
 def become_leader(connection):
     global currentleader
     global follower
-    log.log({'info': 'Becoming leader of collective'})
+    log.log({'info': 'Becoming leader of collective',
+             'subsystem': 'collective'})
     if follower:
         follower.kill()
         follower = None

From e098c0ba91fbdc2031aad5821127d379d6c00234 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 1 Oct 2018 14:03:45 -0400
Subject: [PATCH 06/20] Fix missing tenant argument on user management function

The tenant was omitted preventing those particular rpc calls from
working correctly.
---
 confluent_server/confluent/config/configmanager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py
index d5e7d1e5..e9258eb4 100644
--- a/confluent_server/confluent/config/configmanager.py
+++ b/confluent_server/confluent/config/configmanager.py
@@ -1232,9 +1232,9 @@ class ConfigManager(object):
 
     def del_user(self, name):
         if cfgleader:
-            return exec_on_leader('_rpc_master_del_user', name)
+            return exec_on_leader('_rpc_master_del_user', self.tenant, name)
         if cfgstreams:
-            exec_on_followers('_rpc_del_user', name)
+            exec_on_followers('_rpc_del_user', self.tenant, name)
         self._true_del_user(name)
 
     def _true_del_user(self, name):

From 326f56219b0d4128b86d24c8d7b64f1a370a8b24 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 1 Oct 2018 14:40:02 -0400
Subject: [PATCH 07/20] Fix /networking/macs/by-mac

The module apimacmap was not correctly scoped.
---
 confluent_server/confluent/networking/macmap.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/confluent_server/confluent/networking/macmap.py b/confluent_server/confluent/networking/macmap.py
index 933a873e..ec4759c6 100644
--- a/confluent_server/confluent/networking/macmap.py
+++ b/confluent_server/confluent/networking/macmap.py
@@ -297,6 +297,7 @@ def _finish_update(completions):
 
 def _full_updatemacmap(configmanager):
     global vintage
+    global _apimacmap
     global _macmap
     global _nodesbymac
     global _switchportmap

From b0b5493ff7db65d0f21a4ce0468251b42e2cb626 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 1 Oct 2018 15:29:18 -0400
Subject: [PATCH 08/20] Cancel retry if we become leader

If an instance is first to start, it's retry should be canceled
when other members prod it to become leader.
---
 confluent_server/confluent/collective/manager.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index 5f326591..bf851376 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -500,11 +500,15 @@ def retire_as_leader():
 def become_leader(connection):
     global currentleader
     global follower
+    global retrythread
     log.log({'info': 'Becoming leader of collective',
              'subsystem': 'collective'})
     if follower:
         follower.kill()
         follower = None
+    if retrythread:
+        retrythread.cancel()
+        retrythread = None
     currentleader = connection.getsockname()[0]
     skipaddr = connection.getpeername()[0]
     myname = get_myname()

From 4329c1d38851e4d2a107ee634a39f582da299284 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 1 Oct 2018 15:50:31 -0400
Subject: [PATCH 09/20] Have collective start bail out if leader

Leader should not relinquish if quorum, so don't bother in such
a case.
---
 confluent_server/confluent/collective/manager.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index bf851376..70275d66 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -535,6 +535,12 @@ def start_collective():
     if follower:
         follower.kill()
         follower = None
+    try:
+        if cfm.cfgstreams and cfm.check_quorum():
+            # Do not start if we have quorum and are leader
+            return
+    except exc.DegradedCollective:
+        pass
     if leader_init.active:  # do not start trying to connect if we are
         # xmitting data to a follower
         return

From 78a1741e0e91b3297d8754eb6c231a048ed62dc0 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 1 Oct 2018 16:02:16 -0400
Subject: [PATCH 10/20] Fix usage of check_quorum()

It is not a boolean, it is exception driven.
---
 confluent_server/confluent/collective/manager.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index 70275d66..33b87323 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -536,7 +536,8 @@ def start_collective():
         follower.kill()
         follower = None
     try:
-        if cfm.cfgstreams and cfm.check_quorum():
+        if cfm.cfgstreams:
+            cfm.check_quorum()
             # Do not start if we have quorum and are leader
             return
     except exc.DegradedCollective:

From d86e1fc4eb0d6e67f2c66b3d0d4b42497c9bd28c Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Tue, 2 Oct 2018 10:17:44 -0400
Subject: [PATCH 11/20] Give the cfg init a lock

Move collective manager and configmanager to share a configinitlock,
so that bad timings during internal initialization and collective
activity cannot interfere and produce corrupt database.

This became an issue with the fix for 'everything' disappearing.
---
 .../confluent/collective/manager.py           |  9 +---
 .../confluent/config/configmanager.py         | 54 ++++++++++---------
 2 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/confluent_server/confluent/collective/manager.py b/confluent_server/confluent/collective/manager.py
index 33b87323..09961698 100644
--- a/confluent_server/confluent/collective/manager.py
+++ b/confluent_server/confluent/collective/manager.py
@@ -35,7 +35,6 @@ except ImportError:
     crypto = None
 
 currentleader = None
-cfginitlock = None
 follower = None
 retrythread = None
 
@@ -54,10 +53,7 @@ leader_init = ContextBool()
 
 def connect_to_leader(cert=None, name=None, leader=None):
     global currentleader
-    global cfginitlock
     global follower
-    if cfginitlock is None:
-        cfginitlock = threading.RLock()
     if leader is None:
         leader = currentleader
     log.log({'info': 'Attempting connection to leader {0}'.format(leader),
@@ -70,7 +66,7 @@ def connect_to_leader(cert=None, name=None, leader=None):
                  'subsystem': 'collective'})
         return False
     with connecting:
-        with cfginitlock:
+        with cfm._initlock:
             tlvdata.recv(remote)  # the banner
             tlvdata.recv(remote)  # authpassed... 0..
             if name is None:
@@ -520,13 +516,10 @@ def become_leader(connection):
 
 
 def startup():
-    global cfginitlock
     members = list(cfm.list_collective())
     if len(members) < 2:
         # Not in collective mode, return
         return
-    if cfginitlock is None:
-        cfginitlock = threading.RLock()
     eventlet.spawn_n(start_collective)
 
 def start_collective():
diff --git a/confluent_server/confluent/config/configmanager.py b/confluent_server/confluent/config/configmanager.py
index e9258eb4..0e105c29 100644
--- a/confluent_server/confluent/config/configmanager.py
+++ b/confluent_server/confluent/config/configmanager.py
@@ -82,6 +82,7 @@ _dirtylock = threading.RLock()
 _leaderlock = gthread.RLock()
 _synclock = threading.RLock()
 _rpclock = gthread.RLock()
+_initlock = gthread.RLock()
 _followerlocks = {}
 _config_areas = ('nodegroups', 'nodes', 'usergroups', 'users')
 tracelog = None
@@ -953,38 +954,39 @@ class ConfigManager(object):
 
     def __init__(self, tenant, decrypt=False, username=None):
         global _cfgstore
-        if _cfgstore is None:
-            init()
-        self.decrypt = decrypt
-        self.current_user = username
-        if tenant is None:
-            self.tenant = None
-            if 'main' not in _cfgstore:
-                _cfgstore['main'] = {}
+        with _initlock:
+            if _cfgstore is None:
+                init()
+            self.decrypt = decrypt
+            self.current_user = username
+            if tenant is None:
+                self.tenant = None
+                if 'main' not in _cfgstore:
+                    _cfgstore['main'] = {}
+                    self._bg_sync_to_file()
+                self._cfgstore = _cfgstore['main']
+                if 'nodegroups' not in self._cfgstore:
+                    self._cfgstore['nodegroups'] = {'everything': {'nodes': set()}}
+                    _mark_dirtykey('nodegroups', 'everything', self.tenant)
+                    self._bg_sync_to_file()
+                if 'nodes' not in self._cfgstore:
+                    self._cfgstore['nodes'] = {}
+                    self._bg_sync_to_file()
+                return
+            elif 'tenant' not in _cfgstore:
+                _cfgstore['tenant'] = {tenant: {}}
                 self._bg_sync_to_file()
-            self._cfgstore = _cfgstore['main']
+            elif tenant not in _cfgstore['tenant']:
+                _cfgstore['tenant'][tenant] = {}
+                self._bg_sync_to_file()
+            self.tenant = tenant
+            self._cfgstore = _cfgstore['tenant'][tenant]
             if 'nodegroups' not in self._cfgstore:
-                self._cfgstore['nodegroups'] = {'everything': {'nodes': set()}}
+                self._cfgstore['nodegroups'] = {'everything': {}}
                 _mark_dirtykey('nodegroups', 'everything', self.tenant)
-                self._bg_sync_to_file()
             if 'nodes' not in self._cfgstore:
                 self._cfgstore['nodes'] = {}
-                self._bg_sync_to_file()
-            return
-        elif 'tenant' not in _cfgstore:
-            _cfgstore['tenant'] = {tenant: {}}
             self._bg_sync_to_file()
-        elif tenant not in _cfgstore['tenant']:
-            _cfgstore['tenant'][tenant] = {}
-            self._bg_sync_to_file()
-        self.tenant = tenant
-        self._cfgstore = _cfgstore['tenant'][tenant]
-        if 'nodegroups' not in self._cfgstore:
-            self._cfgstore['nodegroups'] = {'everything': {}}
-            _mark_dirtykey('nodegroups', 'everything', self.tenant)
-        if 'nodes' not in self._cfgstore:
-            self._cfgstore['nodes'] = {}
-        self._bg_sync_to_file()
 
     def get_collective_member(self, name):
         return get_collective_member(name)

From 10cb1b77dd1e8ebd9b1dc9068da763afbd6a3e84 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Tue, 2 Oct 2018 14:36:07 -0400
Subject: [PATCH 12/20] Extend the nodeboot man page

---
 confluent_client/doc/man/nodeboot.ronn | 33 ++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/confluent_client/doc/man/nodeboot.ronn b/confluent_client/doc/man/nodeboot.ronn
index 9b067556..c54d67f7 100644
--- a/confluent_client/doc/man/nodeboot.ronn
+++ b/confluent_client/doc/man/nodeboot.ronn
@@ -4,12 +4,41 @@ nodeboot(8) -- Reboot a confluent node to a specific device
 ## SYNOPSIS
 
 `nodeboot <noderange>`  
-`nodeboot <noderange>` [net|setup]  
+`nodeboot [options] <noderange>` [default|cd|network|setup|hd]  
 
 ## DESCRIPTION
 
 **nodeboot** reboots nodes in a noderange.  If an additional argument is given,
-it sets the node to specifically boot to that as the next boot.
+it sets the node to specifically boot to that as the next boot.  This 
+performs an immediate reboot without waiting for the OS.  To set the boot
+device without inducing a reboot, see the `nodesetboot` command.
+
+## OPTIONS
+
+* `-b`, `--bios`:
+  For a system that supports both BIOS and UEFI style boot, request BIOS style
+  boot if supported (some platforms will UEFI boot with this flag anyway).
+
+* `-p`, `--persist`:
+  For a system that supports it, mark the boot override to persist rather than
+  be a one time change.  Many systems do not support this functionality.
+
+* `default`:
+  Request a normal default boot with no particular device override
+
+* `cd`:
+  Request boot from media.  Note that this can include physical CD,
+  remote media mounted as CD/DVD, and detachable hard disks drives such as usb
+  key devices.
+  
+* `network`:
+  Request boot to network
+  
+* `setup`:
+  Request to enter the firmware configuration menu (e.g. F1 setup) on next boot.
+
+* `hd`:
+  Boot straight to hard disk drive
 
 ## EXAMPLES
 * Booting n3 and n4 to the default boot behavior:

From 15dc4937eef9191652c784750d61a3a13d029706 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Tue, 2 Oct 2018 14:44:25 -0400
Subject: [PATCH 13/20] Add hyphen options to various completion

---
 confluent_client/confluent_env.sh | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh
index f81bf3df..685b685a 100644
--- a/confluent_client/confluent_env.sh
+++ b/confluent_client/confluent_env.sh
@@ -51,8 +51,8 @@ _confluent_get_args()
 _confluent_nodeidentify_completion()
 {
     _confluent_get_args
-    if [ $NUMARGS == 3 ]; then
-        COMPREPLY=($(compgen -W "on off" -- ${COMP_WORDS[-1]}))
+    if [ $NUMARGS -ge 3 ]; then
+        COMPREPLY=($(compgen -W "on off -h" -- ${COMP_WORDS[-1]}))
     fi
     if [ $NUMARGS -lt 3 ]; then
         _confluent_nr_completion
@@ -64,8 +64,8 @@ _confluent_nodeidentify_completion()
 _confluent_nodesetboot_completion()
 {
     _confluent_get_args
-    if [ $NUMARGS == 3 ]; then
-        COMPREPLY=($(compgen -W "default cd network setup hd" -- ${COMP_WORDS[-1]}))
+    if [ $NUMARGS -ge 3 ]; then
+        COMPREPLY=($(compgen -W "default cd network setup hd -h -b -p" -- ${COMP_WORDS[-1]}))
     fi
     if [ $NUMARGS -lt 3 ]; then
         _confluent_nr_completion
@@ -76,12 +76,8 @@ _confluent_nodesetboot_completion()
 _confluent_nodepower_completion()
 {
     _confluent_get_args
-    if [ "${CMPARGS[-1]:0:1}" == '-' ]; then
-        COMPREPLY=($(compgen -W "-h -p" -- ${COMP_WORDS[-1]}))
-        return
-    fi
-    if [ $NUMARGS == 3 ]; then
-        COMPREPLY=($(compgen -W "boot off on status" -- ${COMP_WORDS[-1]}))
+    if [ $NUMARGS -ge 3 ]; then
+        COMPREPLY=($(compgen -W "boot off on status -h -p" -- ${COMP_WORDS[-1]}))
         return;
     fi
     if [ $NUMARGS -lt 3 ]; then
@@ -93,14 +89,14 @@ _confluent_nodepower_completion()
 _confluent_nodemedia_completion()
 {
     _confluent_get_args
-    if [ $NUMARGS == 3 ]; then
-        COMPREPLY=($(compgen -W "list upload attach detachall" -- ${COMP_WORDS[-1]}))
-        return;
-    fi
-    if [ $NUMARGS -gt 3 ] && [ ${CMPARGS[2]} == 'upload' ]; then
+    if [ $NUMARGS -gt 3 ] && [ ${CMPARGS[-2]} == 'upload' ]; then
         compopt -o default
         COMPREPLY=()
         return
+    fi
+        if [ $NUMARGS --ge 3 ]; then
+        COMPREPLY=($(compgen -W "list upload attach detachall -h" -- ${COMP_WORDS[-1]}))
+        return;
     fi
     if [ $NUMARGS -lt 3 ]; then
         _confluent_nr_completion

From 2f616d4586df522df7834aada87ff84e149ca073 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Wed, 3 Oct 2018 16:23:20 -0400
Subject: [PATCH 14/20] Better error when collective.manager is set to
 something invalid

If the collective.manager field does not correspond to any collective
manager, give a useful error rather than unexpected error.
---
 confluent_server/confluent/core.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/confluent_server/confluent/core.py b/confluent_server/confluent/core.py
index bb0fb72b..cc469661 100644
--- a/confluent_server/confluent/core.py
+++ b/confluent_server/confluent/core.py
@@ -859,8 +859,16 @@ def dispatch_request(nodes, manager, element, configmanager, inputdata,
                                  certfile='/etc/confluent/srvcert.pem')
     except Exception:
         for node in nodes:
-            yield msg.ConfluentResourceUnavailable(
-                node, 'Collective member {0} is unreachable'.format(a['name']))
+            if a:
+                yield msg.ConfluentResourceUnavailable(
+                    node, 'Collective member {0} is unreachable'.format(
+                        a['name']))
+            else:
+                yield msg.ConfluentResourceUnavailable(
+                    node,
+                    '"{0}" is not recognized as a collective member'.format(
+                        manager))
+
         return
     if not util.cert_matches(a['fingerprint'], remote.getpeercert(
             binary_form=True)):

From 32602fbba3ae6d344fd7ba97d9774eedec8830d9 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Thu, 4 Oct 2018 10:23:55 -0400
Subject: [PATCH 15/20] Provide interactive handling of key mismatch in ssh
 sessions

Before, ssh would close without so much as a warning, fix this by
dealing with the key data.
---
 confluent_server/confluent/exceptions.py      |  1 +
 .../confluent/plugins/shell/ssh.py            | 39 ++++++++++++++++++-
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/confluent_server/confluent/exceptions.py b/confluent_server/confluent/exceptions.py
index a2c67b68..80f5337e 100644
--- a/confluent_server/confluent/exceptions.py
+++ b/confluent_server/confluent/exceptions.py
@@ -106,6 +106,7 @@ class PubkeyInvalid(ConfluentException):
         super(PubkeyInvalid, self).__init__(self, text)
         self.fingerprint = fingerprint
         self.attrname = attribname
+        self.message = text
         bodydata = {'message': text,
                     'event': event,
                     'fingerprint': fingerprint,
diff --git a/confluent_server/confluent/plugins/shell/ssh.py b/confluent_server/confluent/plugins/shell/ssh.py
index 9f693b30..2e44e9f8 100644
--- a/confluent_server/confluent/plugins/shell/ssh.py
+++ b/confluent_server/confluent/plugins/shell/ssh.py
@@ -1,6 +1,6 @@
 # vim: tabstop=4 shiftwidth=4 softtabstop=4
 
-# Copyright 2015 Lenovo
+# Copyright 2015-2018 Lenovo
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -113,13 +113,48 @@ class SshShell(conapi.Console):
             self.password = ''
             self.datacallback('\r\nlogin as: ')
             return
+        except cexc.PubkeyInvalid as pi:
+            self.keyaction = ''
+            self.candidatefprint = pi.fingerprint
+            self.datacallback(pi.message)
+            self.keyattrname = pi.attrname
+            self.datacallback('\r\nNew fingerprint: ' + pi.fingerprint)
+            self.inputmode = -1
+            self.datacallback('\r\nEnter "disconnect" or "accept": ')
+            return
         self.inputmode = 2
         self.connected = True
         self.shell = self.ssh.invoke_shell()
         self.rxthread = eventlet.spawn(self.recvdata)
 
     def write(self, data):
-        if self.inputmode == 0:
+        if self.inputmode == -1:
+            while len(data) and data[0] == b'\x7f' and len(self.keyaction):
+                self.datacallback('\b \b')  # erase previously echoed value
+                self.keyaction = self.keyaction[:-1]
+                data = data[1:]
+            while len(data) and data[0] == b'\x7f':
+                data = data[1:]
+            while b'\x7f' in data:
+                delidx = data.index(b'\x7f')
+                data = data[:delidx - 1] + data[delidx + 1:]
+            self.keyaction += data
+            if '\r' in self.keyaction:
+                action = self.keyaction.split('\r')[0]
+                if action.lower() == 'accept':
+                    self.nodeconfig.set_node_attributes(
+                        {self.node:
+                             {self.keyattrname: self.candidatefprint}})
+                    self.datacallback('\r\n')
+                    self.logon()
+                elif action.lower() == 'disconnect':
+                    self.datacallback(conapi.ConsoleEvent.Disconnect)
+                else:
+                    self.keyaction = ''
+                    self.datacallback('\r\nEnter "disconnect" or "accept": ')
+            elif len(data) > 0:
+                self.datacallback(data)
+        elif self.inputmode == 0:
             while len(data) and data[0] == b'\x7f' and len(self.username):
                 self.datacallback('\b \b')  # erase previously echoed value
                 self.username = self.username[:-1]

From 8d9a0827392a86f29e35ab8724d6a828c56c04a9 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Thu, 4 Oct 2018 14:59:25 -0400
Subject: [PATCH 16/20] Provide better exceptions and propogate them to client
 on snmp

When doing snmp, messages would always go to log only, even if the
user was at the confetty cli.  Give user access to knowing the error
impacting the query.
---
 confluent_server/confluent/networking/lldp.py | 38 ++++++++++++-------
 confluent_server/confluent/snmputil.py        | 13 +++++--
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/confluent_server/confluent/networking/lldp.py b/confluent_server/confluent/networking/lldp.py
index a54e7b55..4df205bb 100644
--- a/confluent_server/confluent/networking/lldp.py
+++ b/confluent_server/confluent/networking/lldp.py
@@ -171,7 +171,7 @@ def _extract_neighbor_data_b(args):
 
     args are carried as a tuple, because of eventlet convenience
     """
-    switch, password, user, force = args
+    switch, password, user, force = args[:4]
     vintage = _neighdata.get(switch, {}).get('!!vintage', 0)
     now = util.monotonic_time()
     if vintage > (now - 60) and not force:
@@ -220,17 +220,19 @@ def _extract_neighbor_data_b(args):
     _neighdata[switch] = lldpdata
 
 
-def update_switch_data(switch, configmanager, force=False):
+def update_switch_data(switch, configmanager, force=False, retexc=False):
     switchcreds = netutil.get_switchcreds(configmanager, (switch,))[0]
-    _extract_neighbor_data(switchcreds + (force,))
+    ndr = _extract_neighbor_data(switchcreds + (force, retexc))
+    if retexc and isinstance(ndr, Exception):
+        raise ndr
     return _neighdata.get(switch, {})
 
 
-def update_neighbors(configmanager, force=False):
-    return _update_neighbors_backend(configmanager, force)
+def update_neighbors(configmanager, force=False, retexc=False):
+    return _update_neighbors_backend(configmanager, force, retexc)
 
 
-def _update_neighbors_backend(configmanager, force):
+def _update_neighbors_backend(configmanager, force, retexc):
     global _neighdata
     global _neighbypeerid
     vintage = _neighdata.get('!!vintage', 0)
@@ -241,7 +243,7 @@ def _update_neighbors_backend(configmanager, force):
     _neighbypeerid = {'!!vintage': now}
     switches = netutil.list_switches(configmanager)
     switchcreds = netutil.get_switchcreds(configmanager, switches)
-    switchcreds = [ x + (force,) for x in switchcreds]
+    switchcreds = [ x + (force, retexc) for x in switchcreds]
     pool = GreenPool(64)
     for ans in pool.imap(_extract_neighbor_data, switchcreds):
         yield ans
@@ -258,9 +260,15 @@ def _extract_neighbor_data(args):
         return
     try:
         with _updatelocks[switch]:
-            _extract_neighbor_data_b(args)
-    except Exception:
-        log.logtrace()
+            return _extract_neighbor_data_b(args)
+    except Exception as e:
+        yieldexc = False
+        if len(args) >= 5:
+            yieldexc = args[4]
+        if yieldexc:
+            return e
+        else:
+            log.logtrace()
 
 if __name__ == '__main__':
     # a quick one-shot test, args are switch and snmpv1 string for now
@@ -327,7 +335,9 @@ def _handle_neighbor_query(pathcomponents, configmanager):
         # guaranteed
         if (parms['by-peerid'] not in _neighbypeerid and
                 _neighbypeerid.get('!!vintage', 0) < util.monotonic_time() - 60):
-            list(update_neighbors(configmanager))
+            for x in update_neighbors(configmanager, retexc=True):
+                if isinstance(x, Exception):
+                    raise x
         if parms['by-peerid'] not in _neighbypeerid:
             raise exc.NotFoundException('No matching peer known')
         return _dump_neighbordatum(_neighbypeerid[parms['by-peerid']])
@@ -336,9 +346,11 @@ def _handle_neighbor_query(pathcomponents, configmanager):
     if listrequested not in multi_selectors | single_selectors:
         raise exc.NotFoundException('{0} is not found'.format(listrequested))
     if 'by-switch' in parms:
-        update_switch_data(parms['by-switch'], configmanager)
+        update_switch_data(parms['by-switch'], configmanager, retexc=True)
     else:
-        list(update_neighbors(configmanager))
+        for x in update_neighbors(configmanager, retexc=True):
+            if isinstance(x, Exception):
+                raise x
     return list_info(parms, listrequested)
 
 
diff --git a/confluent_server/confluent/snmputil.py b/confluent_server/confluent/snmputil.py
index ae6c39b6..e862afe8 100644
--- a/confluent_server/confluent/snmputil.py
+++ b/confluent_server/confluent/snmputil.py
@@ -92,12 +92,17 @@ class Session(object):
                 errstr, errnum, erridx, answers = rsp
                 if errstr:
                     errstr = str(errstr)
-                    if errstr in ('unknownUserName', 'wrongDigest'):
-                        raise exc.TargetEndpointBadCredentials(errstr)
+                    finerr = errstr + ' while trying to connect to ' \
+                                      '{0}'.format(self.server)
+                    if errstr in ('Unknown USM user', 'unknownUserName',
+                                  'wrongDigest', 'Wrong SNMP PDU digest'):
+                        raise exc.TargetEndpointBadCredentials(finerr)
                     # need to do bad credential versus timeout
-                    raise exc.TargetEndpointUnreachable(errstr)
+                    raise exc.TargetEndpointUnreachable(finerr)
                 elif errnum:
-                    raise exc.ConfluentException(errnum.prettyPrint())
+                    raise exc.ConfluentException(errnum.prettyPrint() +
+                                                 ' while trying to connect to '
+                                                 '{0}'.format(self.server))
                 for ans in answers:
                     if not obj[0].isPrefixOf(ans[0]):
                         # PySNMP returns leftovers in a bulk command

From 73c06fd25eae89d26c7bfeeb802bcd032034c8c2 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 8 Oct 2018 09:54:03 -0400
Subject: [PATCH 17/20] Fix display of error on join of collective

---
 confluent_server/bin/collective | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/confluent_server/bin/collective b/confluent_server/bin/collective
index a6465ef4..5f0c78f6 100644
--- a/confluent_server/bin/collective
+++ b/confluent_server/bin/collective
@@ -66,8 +66,9 @@ def join_collective(server, invitation):
                                     'invitation': invitation,
                                     'server': server}})
     res = tlvdata.recv(s)
-    print(res.get('collective',
-                  {'status': 'Unknown response: ' + repr(res)})['status'])
+    res = res.get('collective',
+                  {'status': 'Unknown response: ' + repr(res)})
+    print(res.get('status', res['error']))
 
 
 def show_collective():

From 5baab5bef465d38cd247232a00dafe87b6312c99 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 8 Oct 2018 10:47:38 -0400
Subject: [PATCH 18/20] Add more stateful to completion

Allow it to sense words already used in command.  Refactor to common
code for similar ones.
---
 confluent_client/confluent_env.sh | 56 +++++++++++++++++++------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/confluent_client/confluent_env.sh b/confluent_client/confluent_env.sh
index 685b685a..3312002c 100644
--- a/confluent_client/confluent_env.sh
+++ b/confluent_client/confluent_env.sh
@@ -45,57 +45,69 @@ _confluent_get_args()
     NUMARGS=${#CMPARGS[@]}
     if [ "${COMP_WORDS[-1]}" == '' ]; then
         NUMARGS=$((NUMARGS+1))
+        CMPARGS+=("")
     fi
+    GENNED=""
+    for CAND in ${COMP_CANDIDATES[@]}; do
+        candarray=(${CAND//,/ })
+        matched=0
+        for c in "${candarray[@]}"; do
+            for arg in "${CMPARGS[@]}"; do
+                if [ "$arg" = "$c" ]; then
+                    matched=1
+                    break
+                fi
+            done
+        done
+        if [ 0 = $matched ]; then
+            for c in "${candarray[@]}"; do
+                GENNED+=" $c"
+            done
+        fi
+    done
 }
 
-_confluent_nodeidentify_completion()
+function _confluent_generic_completion()
 {
     _confluent_get_args
-    if [ $NUMARGS -ge 3 ]; then
-        COMPREPLY=($(compgen -W "on off -h" -- ${COMP_WORDS[-1]}))
+    if [ $NUMARGS -ge 3 ] && [ ! -z "$GENNED" ]; then
+        COMPREPLY=($(compgen -W "$GENNED" -- ${COMP_WORDS[-1]}))
     fi
     if [ $NUMARGS -lt 3 ]; then
         _confluent_nr_completion
         return;
     fi
 }
+_confluent_nodeidentify_completion()
+{
+    COMP_CANDIDATES=("on,off -h")
+    _confluent_generic_completion
+}
 
 
 _confluent_nodesetboot_completion()
 {
-    _confluent_get_args
-    if [ $NUMARGS -ge 3 ]; then
-        COMPREPLY=($(compgen -W "default cd network setup hd -h -b -p" -- ${COMP_WORDS[-1]}))
-    fi
-    if [ $NUMARGS -lt 3 ]; then
-        _confluent_nr_completion
-        return;
-    fi
+    COMP_CANDIDATES=("default,cd,network,setup,hd -h -b -p")
+    _confluent_generic_completion
 }
 
 _confluent_nodepower_completion()
 {
-    _confluent_get_args
-    if [ $NUMARGS -ge 3 ]; then
-        COMPREPLY=($(compgen -W "boot off on status -h -p" -- ${COMP_WORDS[-1]}))
-        return;
-    fi
-    if [ $NUMARGS -lt 3 ]; then
-        _confluent_nr_completion
-        return;
-    fi
+   COMP_CANDIDATES=("boot,off,on,status -h -p")
+   _confluent_generic_completion
 }
 
 _confluent_nodemedia_completion()
 {
+    COMP_CANDIDATES=("list,upload,attach,detachall -h")
     _confluent_get_args
     if [ $NUMARGS -gt 3 ] && [ ${CMPARGS[-2]} == 'upload' ]; then
         compopt -o default
         COMPREPLY=()
         return
     fi
-        if [ $NUMARGS --ge 3 ]; then
-        COMPREPLY=($(compgen -W "list upload attach detachall -h" -- ${COMP_WORDS[-1]}))
+    if [ $NUMARGS -ge 3 ] && [ ! -z "$GENNED" ]; then
+        COMPREPLY=($(compgen -W "$GENNED" -- ${COMP_WORDS[-1]}))
         return;
     fi
     if [ $NUMARGS -lt 3 ]; then

From c6b8aaf372356a70fa654686ba0bc6160346de6a Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 8 Oct 2018 10:57:48 -0400
Subject: [PATCH 19/20] Fix mistake in the nodegroupdefine man page

---
 confluent_client/doc/man/nodegroupdefine.ronn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/confluent_client/doc/man/nodegroupdefine.ronn b/confluent_client/doc/man/nodegroupdefine.ronn
index cf86c46c..9b2aba8d 100644
--- a/confluent_client/doc/man/nodegroupdefine.ronn
+++ b/confluent_client/doc/man/nodegroupdefine.ronn
@@ -7,7 +7,7 @@ nodegroupdefine(8) -- Define new confluent node group
 
 ## DESCRIPTION
 
-`nodegroupdefine` allows the definition of a new node for the confluent management
+`nodegroupdefine` allows the definition of a new nodegroup for the confluent management
 service.  It may only define a single group name at a time.
 It has the same syntax as `nodegroupattrib(8)`, and the commands differ in
 that `nodegroupattrib(8)` will error if a node group does not exist.

From 20a37f8db53cc5c6b57d32b124274c3b9961b612 Mon Sep 17 00:00:00 2001
From: Jarrod Johnson <jjohnson2@lenovo.com>
Date: Mon, 8 Oct 2018 11:02:33 -0400
Subject: [PATCH 20/20] Add mention of the -u options to the manpages.

---
 confluent_client/doc/man/nodeboot.ronn    | 3 +++
 confluent_client/doc/man/nodesetboot.ronn | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/confluent_client/doc/man/nodeboot.ronn b/confluent_client/doc/man/nodeboot.ronn
index c54d67f7..36804b63 100644
--- a/confluent_client/doc/man/nodeboot.ronn
+++ b/confluent_client/doc/man/nodeboot.ronn
@@ -19,6 +19,9 @@ device without inducing a reboot, see the `nodesetboot` command.
   For a system that supports both BIOS and UEFI style boot, request BIOS style
   boot if supported (some platforms will UEFI boot with this flag anyway).
 
+* `-u`, `--uefi`:
+  This flag does nothing, it is for command compatibility with xCAT's rsetboot
+
 * `-p`, `--persist`:
   For a system that supports it, mark the boot override to persist rather than
   be a one time change.  Many systems do not support this functionality.
diff --git a/confluent_client/doc/man/nodesetboot.ronn b/confluent_client/doc/man/nodesetboot.ronn
index 0fc881e1..dce303b1 100644
--- a/confluent_client/doc/man/nodesetboot.ronn
+++ b/confluent_client/doc/man/nodesetboot.ronn
@@ -30,7 +30,10 @@ control.
 * `-p`, `--persist`:
   For a system that supports it, mark the boot override to persist rather than
   be a one time change.  Many systems do not support this functionality.
-  
+
+* `-u`, `--uefi`:
+  This flag does nothing, it is for command compatibility with xCAT's rsetboot
+
 * `default`:
   Request a normal default boot with no particular device override