diff --git a/confluent/config.py b/confluent/config.py index b7f09e0f..1dfc2666 100644 --- a/confluent/config.py +++ b/confluent/config.py @@ -1,10 +1,14 @@ # Copyright 2013 IBM # All rights reserved -# This would be similar to Table.pm functionality -# Two backends -# simple plain JSON -# redis + +# Note on the cryptography. Default behavior is mostly just to pave the +# way to meaningful security. Root all potentially sensitive data in +# one key. That key is in plain sight, so not meaningfully protected +# However, the key can be protected in the following ways: +# - Passphrase protected (requiring human interaction every restart) +# - TPM sealing (which would forgo the interactive assuming risk of +# physical attack on TPM is not a concern) # This time around, expression based values will be parsed when set, and the # parsing results will be stored rather than parsing on every evaluation @@ -12,8 +16,240 @@ # as well as the $1, $2, etc extracted from nodename. Left hand side can # be requested to customize $1 and $2, but it is not required +#Actually, may override one of the python string formatters: +# 2.6 String.Formatter, e.g. "hello {world}" +# 2.4 string.Template, e.g. "hello $world" + # In JSON mode, will just read and write entire thing, with a comment # to dissuade people from hand editing. # In JSON mode, a file for different categories (site, nodes, etc) # in redis, each category is a different database number + +import array +import collections +import math +import os + + +from Crypto.Cipher import AES +from Crypto.Hash import HMAC +from Crypto.Hash import SHA25sterkey = None +_masterintegritykey = None +_cfgstore = {} + +def _expand_expression(attribute, nodeobj): + # here is where we may avail ourselves of string.Formatter or + # string.Template + # we would then take the string that is identifier and do + # a little ast magic + # {(n1+1)/12+1} would take first number from nodename + # {enclosure.id * 8} would take enclosure.id value + # ast scheme would envolve the operator module and ast + # modules, with a mapping from ast operator classes to + # valid operator functions + # ast.parse gives a body array, and value is where we kick off + # ast.Num has an 'n' member to give the number + # ast.Attribute o +#>>> import ast +#>>> b=ast.parse("enclosure.id+n0+1/2") +#>>> b.body[0].value +#<_ast.BinOp object at 0x7ff449ff0090> +#>>> b.body[0].value.op +#<_ast.Add object at 0x7ff4500faf90> +#>>> b.body[0].value.left +#<_ast.BinOp object at 0x7ff449ff00d0> +#>>> b.body[0].value.left.op +#<_ast.Add object at 0x7ff4500faf90> +#>>> b.body[0].value.left.left +#<_ast.Attribute object at 0x7ff449ff0110> +#>>> b.body[0].value.left.left.value.id +#'enclosure' +#>>> b.body[0].value.left.right +#<_ast.Name object at 0x7ff449ff0190> +#>>> b.body[0].value.left.right.id +#'n0' +#>>> b.body[0].value.left.left.id +#Traceback (most recent call last): +# File "", line 1, in +#AttributeError: 'Attribute' object has no attribute 'id' +#>>> b.body[0].value.left.left.attr +#'id' +#import ast +#import operator as op +# supported operators +#operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul, +# ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor} +#ef eval_expr(expr): +# """ +# >>> eval_expr('2^6') +# 4 +# >>> eval_expr('2**6') +# 64 +# >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)') +# -5.0 +# """ +# return eval_(ast.parse(expr).body[0].value) # Module(body=[Expr(value=...)]) +#ef eval_(node): +# if isinstance(node, ast.Num): # +# return node.n +# elif isinstance(node, ast.operator): # +# return operators[type(node)] +# elif isinstance(node, ast.BinOp): # +# return eval_(node.op)(eval_(node.left), eval_(node.right)) +# else: +# raise TypeError(node) + pass + + +def unlock_config_keys(passphrase=None): + _init_masterkey(passphrase) + + +def _pbkdf2(passphrase, salt, iterations, size): + blocks = int(math.ceil(size/32.0)) # Hardcoded to SHA256 behavior + retkey = "" + for block in xrange(blocks): + citerations = iterations + tsalt = salt + chr(block) + currval = HMAC.new(passphrase, tsalt, SHA256).digest() + currarray = array.array('L',currval) + while citerations > 1: + currval = HMAC.new(passphrase, currval).digest() + nextarray = array.array('L',currval) + for index in range(nextarray): + currarray[index] = currarray[index] ^ nextarray[index] + currval = currarray.tostring() + currarray = nextarray + citerations = citerations - 1 + retkey += currval + return retkey[:size] + + +def _derive_keys(passphrase, salt): + tmpkey = _pbkdf2(passphrase, salt, 50000, 32) + finalkey = _pbkdf2(tmpkey, salt, 50000, 96) + return (finalkey[:32],finalkey[32:]) + + +def _get_protected_key(keydict, passphrase): + if keydict['unencryptedvalue']: + return keydict['unencryptedvalue'] + # TODO(jbjohnso): check for TPM sealing + if 'passphraseprotected' in keydict: + if passphrase is None: + raise Exception("Passphrase protected secret requires passhrase") + for pp in keydict['passphraseprotected']: + salt = pp[0] + privkey, integkey = _derive_keys(passphrase, salt) + return _decrypt_value(pp[1:], key=privkey, integritykey=integkey) + else: + raise Exception("No available decryption key") + + +def _format_key(key, passphrase=None): + if passphrase is not None: + salt = os.urandom(32) + privkey, integkey = _derive_keys(passphrase, salt) + cval = _crypt_value(key, key=privkey, integritykey=integkey) + return {"passphraseprotected": cval} + else: + return {"unencryptedvalue": key} + + +def _init_masterkey(passphrase=None): + if 'master_privacy_key' in _cfgstore['globals']: + _masterkey = _get_protected_key( + _cfgstore['globals']['master_privacy_key'], + passphrase=passphrase) + else: + _masterkey = os.urandom(32) + _cfgstore['globals']['master_privacy_key'] = _format_key(_masterkey, + passphrase=passphrase) + if 'master_integrity_key' in _cfgstore['globals']: + _masterintegritykey = _get_protected_key( + _cfgstore['globals']['master_integrity_key'], + passphrase=passphrase + ) + else: + _masterintegritykey = os.urandom(64) + _cfgstore['globals']['master_integrity_key'] = _format_key( + _masterintegritykey, + passphrase=passphrase + ) + + + +def _decrypt_value(cryptvalue, + key=_masterkey, + integritykey=_masterintegritykey): + iv, cipherdata, hmac = cryptvalue + if _masterkey is None or _masterintegritykey is None: + _init_masterkey() + check_hmac = HMAC.new(_masterintegritykey, cryptvalue, SHA256).digest() + if hmac != check_hmac: + raise Exception("bad HMAC value on crypted value") + decrypter = AES.new(_masterkey, AES.MODE_CBC, iv) + value = decrypter.decrypt(cryptvalue) + padsize = ord(value[-1]) + pad = value[-padsize:] + # Note that I cannot grasp what could be done with a subliminal + # channel in padding in this case, but check the padding anyway + for padbyte in pad: + if ord(padbyte) != padsize: + raise Exception("bad padding in encrypted value") + return value[0:-padsize] + + +def _crypt_value(value, + key=_masterkey, + integritykey=_masterintegritykey): + # encrypt given value + # PKCS7 is the padding scheme to employ, if no padded needed, pad with 16 + # check HMAC prior to attempting decrypt + if key is None or integritykey is None: + _init_masterkey() + key=_masterkey + integritykey=_masterintegritykey + iv = os.urandom(16) + crypter = AES.new(key, ASE.MOD_CBC, iv) + neededpad = 16 - (len(value) % 16) + pad = chr(neededpad) * neededpad + value = value + pad + cryptval = crypter.encrypt(value) + hmac = HMAC.new(integritykey, cryptval, SHA256).digest() + return (iv, cryptval, hmac) + + +class NodeAttribs(object): + def __init__(self, nodes=[], attributes=[], tenant=0): + self._nodelist = collecitons.dequeue(nodes) + self._tenant = tenant + self._attributes=attributes + + def __iter__(self): + return self + + def next(): + node = self._nodelist.popleft() + onodeobj = _cfgstore['node'][(self._tenant,node)] + nodeobj = + attriblist = [] + #if there is a filter, delete irrelevant keys + if self._attributes.length > 0: + for attribute in nodeobj.keys(): + if attribute not in self._attributes: + del nodeobj[attribute] + #now that attributes are filtered, seek out and evaluate expressions + for attribute in nodeobj.keys(): + if ('value' not in nodeobj[attribute] and + 'cryptvalue' in nodeobj[attribute]): + nodeobj[attribute]['value'] = _decrypt_value( + nodeobj[attribute]['cryptvalue']) + if ('value' not in nodeobj[attribute] and + 'expression' in nodeobj[attribute]): + nodeobj[attribute]['value'] = _expand_expression( + attribute=attribute, + nodeobj=nodeobj) + + diff --git a/confluent/webapi.py b/confluent/httpapi.py similarity index 82% rename from confluent/webapi.py rename to confluent/httpapi.py index a971b685..edc6986f 100644 --- a/confluent/webapi.py +++ b/confluent/httpapi.py @@ -9,19 +9,24 @@ import confluent.util as util import eventlet import os import string +import urlparse scgi = eventlet.import_patched('flup.server.scgi') consolesessions = {} -def _get_query_dict(qstring): +def _get_query_dict(qstring, reqbody, reqtype): qdict = {} if not qstring: return qdict for qpair in qstring.split('&'): qkey, qvalue = qpair.split('=') qdict[qkey] = qvalue + if reqbody is not None: + if reqtype == "application/x-www-form-urlencoded": + + return qdict @@ -39,19 +44,15 @@ def _authorize_request(env): return 200 -def _format_response(response): - - def _pick_mimetype(env): """Detect the http indicated mime to send back. Note that as it gets into the ACCEPT header honoring, it only looks for application/json and else gives up and assumes html. This is because - browsers are too terrible. It is assumed that + browsers are very chaotic about ACCEPT header. It is assumed that XMLHttpRequest.setRequestHeader will be used by clever javascript if the '.json' scheme doesn't cut it. """ - # TODO(jbjohnso): will this scheme actually play nice with shellinabox? if env['PATH_INFO'].endswith('.json'): return 'application/json' elif env['PATH_INFO'].endswith('.html'): @@ -74,12 +75,18 @@ def resourcehandler(env, start_response): """ authorized = _authorize_request(env) mimetype = _pick_mimetype(env) - print repr(env) + reqbody = None + reqtype = None + if 'CONTENT_LENGTH' in env and env['CONTENT_LENGTH']: + reqbody = env['wsgi.input'].read(int(env['CONTENT_LENGTH'])) + reqtype = env['CONTENT_TYPE'] + print env if authorized in (401, 403): start_response(authorized, []) return - querydict = _get_query_dict(env['QUERY_STRING']) + querydict = _get_query_dict(env['QUERY_STRING'], reqbody, reqtype) if '/console/session' in env['PATH_INFO']: + #hard bake JSON into this path, do not support other incarnations prefix, _, _ = env['PATH_INFO'].partition('/console/session') _, _, nodename = prefix.rpartition('/') if 'session' not in querydict.keys() or not querydict['session']: @@ -90,9 +97,9 @@ def resourcehandler(env, start_response): return sessid = _assign_consessionid(consession) start_response('200 OK', [('Content-Type', 'application/json; charset=utf-8')]) - return [d+'","data":""}'] + return ['{"session":"%s","data":""}' % sessid] start_response('404 Not Found', []) - return [] + return ["Unrecognized directive (404)"] class HttpApi(object): def start(self): diff --git a/confluent/log.py b/confluent/log.py new file mode 100644 index 00000000..a9e378c9 --- /dev/null +++ b/confluent/log.py @@ -0,0 +1,21 @@ +# Copyright 2013 IBM +# All rights reserved + +# This module contains function to write out log type data. +# In this go around, log data is explicitly kept distinct from config data +# config data almost always retrieved by a particular key value and access +# pattern is random. For logs, the access tends to be sequential. +# +# Current thought is to have a plain-text file and a secondary binary index +# file. The index would track events and time intervals and the seek() value. +# Markers would be put into the plain text, allowing utility to rebuild +# index if something happens beyond the scope of this module's code. +# +# We can contemplate how to add value as an audit log. The following +# possibilities could be explored: +# - Forward Secure Sealing (like systemd). Examine the algorithm and decide +# if it is sufficient (their implementation, for example, seems hard +# to protect against tampering as at least a few moments into the past +# can always be manipulated.... +# - TPM PCRs. Understand better what PCRs may be used/extended perhaps +# per-indexed event.. diff --git a/confluent/main.py b/confluent/main.py index 8dbf183e..6528f5a8 100644 --- a/confluent/main.py +++ b/confluent/main.py @@ -11,7 +11,7 @@ # Things like heartbeating and discovery # It also will optionally snoop SLP DA requests -import confluent.webapi as webapi +import confluent.httpapi as httpapi import eventlet from eventlet.green import socket from eventlet import wsgi @@ -32,12 +32,12 @@ def _load_plugins(): plugins.add(plugin) for plugin in plugins: tmpmod = __import__(plugin) - + def run(): _load_plugins() - httpapi = webapi.HttpApi() - httpapi.start() + webservice = httpapi.HttpApi() + webservice.start() while (1): eventlet.sleep(100) diff --git a/confluent/noderange.py b/confluent/noderange.py new file mode 100644 index 00000000..7dbdc578 --- /dev/null +++ b/confluent/noderange.py @@ -0,0 +1,65 @@ +# this will implement noderange grammar + +# considered ast, but a number of things violate python grammar like [] in +# the middle of strings and use of @ for anything is not in their syntax + +#construct custom grammer with pyparsing + +#>>> grammar = pyparsing.Word(pyparsing.alphanums+'/', pyparsing.alphanums+'[]-.*') | ',-' | ',' | '@' +#>>> parser = pyparsing.nestedExpr('(',')',content=grammar) +#>>> parser.parseString("(n1-n4,compute,(foo@bar),-bob,bob)").asList() +#[['n1-n4', ',', 'compute', ',', ['foo', '@', 'bar'], ',-', 'bob', ',', 'bob']] +import pyparsing +import re + +class NodeRange(object): + """Iterate over a noderange + + :param noderange: string representing a noderange to evaluate + :param verify: whether or not to perform lookups in the config + """ + _grammar = \ + pyparsing.Word( + pyparsing.alphanums + '=', pyparsing.alphanums + '[]-.*+') | \ + ',-' | ',' | '@' + _parser = pyparsing.nestedExpr(content=_grammar) + def __init__(self, noderange, verify=True): + self.verify = verify + elements = self._parser.parseString("(" + noderange + ")").asList() + self._noderange = self._evaluate(elements) + print self._noderange + + def _evaluate(self, parsetree): + current_op = 0 # enum, 0 union, 1 subtract, 2 intersect + current_range = set([]) + if not isinstance(parsetree,list): # down to a plain text thing + return self._expandstring(parsetree) + for elem in parsetree: + if elem == ',-': + current_op = 1 + elif elem == ',': + current_op = 0 + elif elem == '@': + current_op = 2 + elif current_op == 0: + current_range |= self._evaluate(elem) + elif current_op == 1: + current_range -= self._evaluate(elem) + elif current_op == 2: + current_range &= self._evaluate(elem) + return current_range + + def _expandstring(self, element): + if self.verify: + #this is where we would check for exactly this + raise Exception("TODO: link with actual config") + #this is where we would check for a literal groupname + #ok, now time to understand the various things + if '[' in element: #[] style expansion + raise Exception("TODO: [] in expression") + elif '-' in element: # *POSSIBLE* range, could just be part of name + raise Exception("TODO: ranged expression") + elif ':' in element: # : range for less ambiguity + raise Exception("TODO: ranged expression") + if not self.verify: + return set([element])