From a31834910cbe569836994344fe62a66af70bb63d Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 20 Mar 2015 15:39:51 -0400 Subject: [PATCH] Fix incomplete handling of UTF-8 data If the json dumper sees utf-8 encoded strings that it wants to join with other strings, it will not work. Instead make all the data fully unicode to make json dumper happy. Then force it to encode to utf-8 to make python's IO happy. --- confluent_common/confluent/tlvdata.py | 20 ++++++++++++++++++-- confluent_server/confluent/httpapi.py | 5 ++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/confluent_common/confluent/tlvdata.py b/confluent_common/confluent/tlvdata.py index c67453cd..4da3ae38 100644 --- a/confluent_common/confluent/tlvdata.py +++ b/confluent_common/confluent/tlvdata.py @@ -20,20 +20,36 @@ import json import struct +def unicode_dictvalues(dictdata): + for key in dictdata: + if isinstance(dictdata[key], str): + dictdata[key] = dictdata[key].decode('utf-8') + elif isinstance(dictdata[key], list): + for i in xrange(len(dictdata[key])): + if isinstance(dictdata[key][i], str): + dictdata[key][i] = dictdata[key][i].decode('utf-8') + elif isinstance(dictdata[key][i], dict): + unicode_dictvalues(dictdata[key][i]) + elif isinstance(dictdata[key], dict): + unicode_dictvalues(dictdata[key]) + + def send(handle, data): if isinstance(data, str): # plain text, e.g. console data tl = len(data) if tl < 16777216: - #type for string is '0', so we don't need - #to xor anything in + # type for string is '0', so we don't need + # to xor anything in handle.sendall(struct.pack("!I", tl)) else: raise Exception("String data length exceeds protocol") handle.sendall(data) elif isinstance(data, dict): # JSON currently only goes to 4 bytes # Some structured message, like what would be seen in http responses + unicode_dictvalues(data) # make everything unicode, assuming UTF-8 sdata = json.dumps(data, ensure_ascii=False, separators=(',', ':')) + sdata = sdata.encode('utf-8') tl = len(sdata) if tl > 16777215: raise Exception("JSON data exceeds protocol limits") diff --git a/confluent_server/confluent/httpapi.py b/confluent_server/confluent/httpapi.py index bc5e899c..7e103c27 100644 --- a/confluent_server/confluent/httpapi.py +++ b/confluent_server/confluent/httpapi.py @@ -25,6 +25,7 @@ import confluent.exceptions as exc import confluent.log as log import confluent.messages import confluent.core as pluginapi +import confluent.tlvdata as tlvdata import confluent.util as util import copy import eventlet @@ -500,7 +501,9 @@ def _assemble_json(responses, resource, url, extension): else: rspdata[dk] = rsp[dk] rspdata["_links"] = links - yield json.dumps(rspdata, sort_keys=True, indent=4, ensure_ascii=False) + tlvdata.unicode_dictvalues(rspdata) + yield json.dumps( + rspdata, sort_keys=True, indent=4, ensure_ascii=False).encode('utf-8') def serve():