Continue making progress

2025-08-25 12:40:22 +00:00 · 2013-08-16 16:37:19 -04:00
parent 79259a9c32
commit 71e1607b57
5 changed files with 347 additions and 18 deletions
--- a/confluent/config.py
+++ b/confluent/config.py
@@ -1,10 +1,14 @@
 # Copyright 2013 IBM
 # All rights reserved

-# This would be similar to Table.pm functionality
-# Two backends
-#  simple plain JSON
-#  redis
+
+# Note on the cryptography.  Default behavior is mostly just to pave the
+# way to meaningful security.  Root all potentially sensitive data in 
+# one key.  That key is in plain sight, so not meaningfully protected
+# However, the key can be protected in the following ways:
+#   - Passphrase protected (requiring human interaction every restart)
+#   - TPM sealing (which would forgo the interactive assuming risk of
+#           physical attack on TPM is not a concern)

 # This time around, expression based values will be parsed when set, and the
 # parsing results will be stored rather than parsing on every evaluation
@@ -12,8 +16,240 @@
 # as well as the $1, $2, etc extracted from nodename.  Left hand side can
 # be requested to customize $1 and $2, but it is not required

+#Actually, may override one of the python string formatters:
+#   2.6 String.Formatter, e.g. "hello {world}"
+#   2.4 string.Template, e.g. "hello $world"
+
 # In JSON mode, will just read and write entire thing, with a comment
 # to dissuade people from hand editing.

 # In JSON mode, a file for different categories (site, nodes, etc)
 # in redis, each category is a different database number
+
+import array
+import collections
+import math
+import os
+
+
+from Crypto.Cipher import AES
+from Crypto.Hash import HMAC
+from Crypto.Hash import SHA25sterkey = None
+_masterintegritykey = None
+_cfgstore = {}
+
+def _expand_expression(attribute, nodeobj):
+    # here is where we may avail ourselves of string.Formatter or
+    # string.Template
+    # we would then take the string that is identifier and do
+    # a little ast magic
+    # {(n1+1)/12+1} would take first number from nodename
+    # {enclosure.id * 8} would take enclosure.id value
+    # ast scheme would envolve the operator module and ast
+    # modules, with a mapping from ast operator classes to
+    # valid operator functions
+    # ast.parse gives a body array, and value is where we kick off
+    # ast.Num has an 'n' member to give the number
+    # ast.Attribute o
+#>>> import ast
+#>>> b=ast.parse("enclosure.id+n0+1/2")
+#>>> b.body[0].value
+#<_ast.BinOp object at 0x7ff449ff0090>
+#>>> b.body[0].value.op
+#<_ast.Add object at 0x7ff4500faf90>
+#>>> b.body[0].value.left
+#<_ast.BinOp object at 0x7ff449ff00d0>
+#>>> b.body[0].value.left.op
+#<_ast.Add object at 0x7ff4500faf90>
+#>>> b.body[0].value.left.left
+#<_ast.Attribute object at 0x7ff449ff0110>
+#>>> b.body[0].value.left.left.value.id
+#'enclosure'
+#>>> b.body[0].value.left.right
+#<_ast.Name object at 0x7ff449ff0190>
+#>>> b.body[0].value.left.right.id
+#'n0'
+#>>> b.body[0].value.left.left.id
+#Traceback (most recent call last):
+#  File "<stdin>", line 1, in <module>
+#AttributeError: 'Attribute' object has no attribute 'id'
+#>>> b.body[0].value.left.left.attr
+#'id'
+#import ast
+#import operator as op
+# supported operators
+#operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
+#            ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor}
+#ef eval_expr(expr):
+#   """
+#   >>> eval_expr('2^6')
+#   4
+#   >>> eval_expr('2**6')
+#   64
+#   >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
+#   -5.0
+#   """
+#   return eval_(ast.parse(expr).body[0].value) # Module(body=[Expr(value=...)])
+#ef eval_(node):
+#   if isinstance(node, ast.Num): # <number>
+#       return node.n
+#   elif isinstance(node, ast.operator): # <operator>
+#       return operators[type(node)]
+#   elif isinstance(node, ast.BinOp): # <left> <operator> <right>
+#       return eval_(node.op)(eval_(node.left), eval_(node.right))
+#   else:
+#       raise TypeError(node)
+    pass
+
+
+def unlock_config_keys(passphrase=None):
+    _init_masterkey(passphrase)
+
+
+def _pbkdf2(passphrase, salt, iterations, size):
+    blocks = int(math.ceil(size/32.0))  # Hardcoded to SHA256 behavior
+    retkey = ""
+    for block in xrange(blocks):
+        citerations = iterations
+        tsalt = salt + chr(block)
+        currval = HMAC.new(passphrase, tsalt, SHA256).digest()
+        currarray = array.array('L',currval)
+        while citerations > 1:
+            currval = HMAC.new(passphrase, currval).digest()
+            nextarray = array.array('L',currval)
+            for index in range(nextarray):
+                currarray[index] = currarray[index] ^ nextarray[index]
+            currval = currarray.tostring()
+            currarray = nextarray
+            citerations = citerations - 1
+        retkey += currval
+    return retkey[:size]
+
+
+def _derive_keys(passphrase, salt):
+    tmpkey = _pbkdf2(passphrase, salt, 50000, 32)
+    finalkey = _pbkdf2(tmpkey, salt, 50000, 96)
+    return (finalkey[:32],finalkey[32:])
+
+
+def _get_protected_key(keydict, passphrase):
+    if keydict['unencryptedvalue']:
+        return keydict['unencryptedvalue']
+    # TODO(jbjohnso): check for TPM sealing
+    if 'passphraseprotected' in keydict:
+        if passphrase is None:
+            raise Exception("Passphrase protected secret requires passhrase")
+        for pp in keydict['passphraseprotected']:
+            salt = pp[0]
+            privkey, integkey = _derive_keys(passphrase, salt)
+            return _decrypt_value(pp[1:], key=privkey, integritykey=integkey)
+    else:
+        raise Exception("No available decryption key")
+
+
+def _format_key(key, passphrase=None):
+    if passphrase is not None:
+        salt = os.urandom(32)
+        privkey, integkey = _derive_keys(passphrase, salt)
+        cval = _crypt_value(key, key=privkey, integritykey=integkey)
+        return {"passphraseprotected": cval}
+    else:
+        return {"unencryptedvalue": key}
+
+
+def _init_masterkey(passphrase=None):
+    if 'master_privacy_key' in _cfgstore['globals']:
+        _masterkey = _get_protected_key(
+            _cfgstore['globals']['master_privacy_key'],
+            passphrase=passphrase)
+    else:
+        _masterkey = os.urandom(32)
+        _cfgstore['globals']['master_privacy_key'] = _format_key(_masterkey,
+            passphrase=passphrase)
+    if 'master_integrity_key' in _cfgstore['globals']:
+        _masterintegritykey = _get_protected_key(
+            _cfgstore['globals']['master_integrity_key'],
+            passphrase=passphrase
+            )
+    else:
+        _masterintegritykey = os.urandom(64)
+        _cfgstore['globals']['master_integrity_key'] = _format_key(
+            _masterintegritykey,
+            passphrase=passphrase
+        )
+
+
+
+def _decrypt_value(cryptvalue,
+                   key=_masterkey,
+                   integritykey=_masterintegritykey):
+    iv, cipherdata, hmac = cryptvalue
+    if _masterkey is None or _masterintegritykey is None:
+        _init_masterkey()
+    check_hmac = HMAC.new(_masterintegritykey, cryptvalue, SHA256).digest()
+    if hmac != check_hmac:
+        raise Exception("bad HMAC value on crypted value")
+    decrypter = AES.new(_masterkey, AES.MODE_CBC, iv)
+    value = decrypter.decrypt(cryptvalue)
+    padsize = ord(value[-1])
+    pad = value[-padsize:]
+    # Note that I cannot grasp what could be done with a subliminal
+    # channel in padding in this case, but check the padding anyway
+    for padbyte in pad:
+        if ord(padbyte) != padsize:
+            raise Exception("bad padding in encrypted value")
+    return value[0:-padsize]
+
+
+def _crypt_value(value,
+                 key=_masterkey,
+                 integritykey=_masterintegritykey):
+    # encrypt given value
+    # PKCS7 is the padding scheme to employ, if no padded needed, pad with 16
+    # check HMAC prior to attempting decrypt
+    if key is None or integritykey is None:
+        _init_masterkey()
+        key=_masterkey
+        integritykey=_masterintegritykey
+    iv = os.urandom(16)
+    crypter = AES.new(key, ASE.MOD_CBC, iv)
+    neededpad = 16 - (len(value) % 16)
+    pad = chr(neededpad) * neededpad
+    value = value + pad
+    cryptval = crypter.encrypt(value)
+    hmac = HMAC.new(integritykey, cryptval, SHA256).digest()
+    return (iv, cryptval, hmac)
+
+
+class NodeAttribs(object):
+    def __init__(self, nodes=[], attributes=[], tenant=0):
+        self._nodelist = collecitons.dequeue(nodes)
+        self._tenant = tenant
+        self._attributes=attributes
+
+    def __iter__(self):
+        return self
+
+    def next():
+        node = self._nodelist.popleft()
+        onodeobj = _cfgstore['node'][(self._tenant,node)]
+        nodeobj = 
+        attriblist = []
+        #if there is a filter, delete irrelevant keys
+        if self._attributes.length > 0:
+            for attribute in nodeobj.keys():
+                if attribute not in self._attributes:
+                    del nodeobj[attribute]
+        #now that attributes are filtered, seek out and evaluate expressions
+        for attribute in nodeobj.keys():
+            if ('value' not in nodeobj[attribute] and
+                    'cryptvalue' in nodeobj[attribute]):
+                nodeobj[attribute]['value'] = _decrypt_value(
+                                            nodeobj[attribute]['cryptvalue'])
+            if ('value' not in nodeobj[attribute] and
+                    'expression' in nodeobj[attribute]):
+                nodeobj[attribute]['value'] = _expand_expression(
+                                                attribute=attribute,
+                                                nodeobj=nodeobj)
+
+
--- a/confluent/httpapi.py
+++ b/confluent/httpapi.py
@@ -9,19 +9,24 @@ import confluent.util as util
 import eventlet
 import os
 import string
+import urlparse
 scgi = eventlet.import_patched('flup.server.scgi')


 consolesessions = {}


-def _get_query_dict(qstring):
+def _get_query_dict(qstring, reqbody, reqtype):
    qdict = {}
    if not qstring:
        return qdict
    for qpair in qstring.split('&'):
        qkey, qvalue = qpair.split('=')
        qdict[qkey] = qvalue
+    if reqbody is not None:
+        if reqtype == "application/x-www-form-urlencoded":
+            
+
    return qdict


@@ -39,19 +44,15 @@ def _authorize_request(env):
    return 200


-def _format_response(response):
-
-
 def _pick_mimetype(env):
    """Detect the http indicated mime to send back.

    Note that as it gets into the ACCEPT header honoring, it only looks for
    application/json and else gives up and assumes html.  This is because
-    browsers are too terrible.  It is assumed that
+    browsers are very chaotic about ACCEPT header.  It is assumed that
    XMLHttpRequest.setRequestHeader will be used by clever javascript
    if the '.json' scheme doesn't cut it.
    """
-    # TODO(jbjohnso): will this scheme actually play nice with shellinabox?
    if env['PATH_INFO'].endswith('.json'):
        return 'application/json'
    elif env['PATH_INFO'].endswith('.html'):
@@ -74,12 +75,18 @@ def resourcehandler(env, start_response):
    """
    authorized = _authorize_request(env)
    mimetype = _pick_mimetype(env)
-    print repr(env)
+    reqbody = None
+    reqtype = None
+    if 'CONTENT_LENGTH' in env and env['CONTENT_LENGTH']:
+        reqbody = env['wsgi.input'].read(int(env['CONTENT_LENGTH']))
+        reqtype = env['CONTENT_TYPE']
+    print env
    if authorized in (401, 403):
        start_response(authorized, [])
        return
-    querydict = _get_query_dict(env['QUERY_STRING'])
+    querydict = _get_query_dict(env['QUERY_STRING'], reqbody, reqtype)
    if '/console/session' in env['PATH_INFO']:
+        #hard bake JSON into this path, do not support other incarnations
        prefix, _, _ = env['PATH_INFO'].partition('/console/session')
        _, _, nodename = prefix.rpartition('/')
        if 'session' not in querydict.keys() or not querydict['session']:
@@ -90,9 +97,9 @@ def resourcehandler(env, start_response):
                return
            sessid = _assign_consessionid(consession)
            start_response('200 OK', [('Content-Type', 'application/json; charset=utf-8')])
-            return [d+'","data":""}']
+            return ['{"session":"%s","data":""}' % sessid]
    start_response('404 Not Found', [])
-    return []
+    return ["Unrecognized directive (404)"]

 class HttpApi(object):
    def start(self):
--- a/confluent/log.py
+++ b/confluent/log.py
@@ -0,0 +1,21 @@
+# Copyright 2013 IBM
+# All rights reserved
+
+# This module contains function to write out log type data.
+# In this go around, log data is explicitly kept distinct from config data
+# config data almost always retrieved by a particular key value and access
+# pattern is random.  For logs, the access tends to be sequential.
+#
+# Current thought is to have a plain-text file and a secondary binary index
+# file.  The index would track events and time intervals and the seek() value.
+# Markers would be put into the plain text, allowing utility to rebuild
+# index if something happens beyond the scope of this module's code.
+#
+# We can contemplate how to add value as an audit log.  The following
+# possibilities could be explored:
+#   - Forward Secure Sealing (like systemd).  Examine the algorithm and decide
+#     if it is sufficient (their implementation, for example, seems hard
+#     to protect against tampering as at least a few moments into the past
+#     can always be manipulated....
+#   - TPM PCRs.  Understand better what PCRs may be used/extended perhaps
+#     per-indexed event..
--- a/confluent/main.py
+++ b/confluent/main.py
@@ -11,7 +11,7 @@
 # Things like heartbeating and discovery
 # It also will optionally snoop SLP DA requests

-import confluent.webapi as webapi
+import confluent.httpapi as httpapi
 import eventlet
 from eventlet.green import socket
 from eventlet import wsgi
@@ -32,12 +32,12 @@ def _load_plugins():
        plugins.add(plugin)
    for plugin in plugins:
        tmpmod = __import__(plugin)
-        
+

 def run():
    _load_plugins()
-    httpapi = webapi.HttpApi()
-    httpapi.start()
+    webservice = httpapi.HttpApi()
+    webservice.start()
    while (1):
        eventlet.sleep(100)

--- a/confluent/noderange.py
+++ b/confluent/noderange.py
@@ -0,0 +1,65 @@
+# this will implement noderange grammar
+
+# considered ast, but a number of things violate python grammar like [] in
+# the middle of strings and use of @ for anything is not in their syntax
+
+#construct custom grammer with pyparsing
+
+#>>> grammar = pyparsing.Word(pyparsing.alphanums+'/', pyparsing.alphanums+'[]-.*') | ',-' | ',' | '@'
+#>>> parser = pyparsing.nestedExpr('(',')',content=grammar)
+#>>> parser.parseString("(n1-n4,compute,(foo@bar),-bob,bob)").asList()
+#[['n1-n4', ',', 'compute', ',', ['foo', '@', 'bar'], ',-', 'bob', ',', 'bob']]
+import pyparsing
+import re
+
+class NodeRange(object):
+    """Iterate over a noderange
+
+    :param noderange: string representing a noderange to evaluate
+    :param verify: whether or not to perform lookups in the config
+    """
+    _grammar = \
+        pyparsing.Word(
+            pyparsing.alphanums + '=', pyparsing.alphanums + '[]-.*+') | \
+        ',-' | ',' | '@'
+    _parser = pyparsing.nestedExpr(content=_grammar)
+    def __init__(self, noderange, verify=True):
+        self.verify = verify
+        elements = self._parser.parseString("(" + noderange + ")").asList()
+        self._noderange = self._evaluate(elements)
+        print self._noderange
+
+    def _evaluate(self, parsetree):
+        current_op = 0 # enum, 0 union, 1 subtract, 2 intersect
+        current_range = set([])
+        if not isinstance(parsetree,list):  # down to a plain text thing
+            return self._expandstring(parsetree)
+        for elem in parsetree:
+            if elem == ',-':
+                current_op = 1
+            elif elem == ',':
+                current_op = 0
+            elif elem == '@':
+                current_op = 2
+            elif current_op == 0:
+                current_range |= self._evaluate(elem)
+            elif current_op == 1:
+                current_range -= self._evaluate(elem)
+            elif current_op == 2:
+                current_range &= self._evaluate(elem)
+        return current_range
+
+    def _expandstring(self, element):
+        if self.verify:
+            #this is where we would check for exactly this
+            raise Exception("TODO: link with actual config")
+        #this is where we would check for a literal groupname
+        #ok, now time to understand the various things
+        if '[' in element:  #[] style expansion
+            raise Exception("TODO: [] in expression")
+        elif '-' in element:  # *POSSIBLE* range, could just be part of name
+            raise Exception("TODO: ranged expression")
+        elif ':' in element:  # : range for less ambiguity
+            raise Exception("TODO: ranged expression")
+        if not self.verify:
+            return set([element])