Skip to content

Commit de966ae

Browse files
authored
Improve Serialization Logic (#82)
* Drop items that can't be serialized. As a part of the python3 conversion, we added logic to try to convert bytestrings to unicode strings. This had a problem because not all bytestrings that are produced as a part of the context passed into and out of the jailed code execution is actually unicode strings. In some cases this was resulting in a unicode decode error. Before this code existed we used to just drop any nonstandard types and also only keep complex types(list,dict,tuples) that were easily jsonable. In python2 this was fine because byte strings would automatically get coerced to unicode strings. However in python3 this will throw errors. So if we don't explicitly try to convert all the byte strings to unicode, we could stop passing back data that we previously used to pass in. The first iteration of this code assumed all byte arrays were unicode strings. From evidence in production this is not always the case. So we fall back to the behaviour we had before this change where if an item can't successfully be converted to something we can serialize to json, we just drop that item from the list of content being passed back through the serialization straw. * Bump the version.
1 parent ed3d36c commit de966ae

File tree

3 files changed

+107
-30
lines changed

3 files changed

+107
-30
lines changed

codejail/safe_exec.py

Lines changed: 97 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ def safe_exec(code, globals_dict, files=None, python_path=None, slug=None,
9797
class DevNull(object):
9898
def write(self, *args, **kwargs):
9999
pass
100+
101+
def flush(self, *args, **kwargs):
102+
pass
100103
sys.stdout = DevNull()
101104
"""
102105
# Read the code and the globals from the stdin.
@@ -126,20 +129,6 @@ def write(self, *args, **kwargs):
126129
# so recursively convert them to strings prior to creating the final globals
127130
# dict
128131
"""
129-
def decode_object(obj):
130-
if isinstance(obj, bytes):
131-
return obj.decode('utf8')
132-
elif isinstance(obj, list):
133-
return [decode_object(i) for i in obj]
134-
elif isinstance(obj, dict):
135-
return {k: decode_object(v) for k, v in six.iteritems(obj)}
136-
elif isinstance(obj, tuple):
137-
return tuple(decode_object(i) for i in obj)
138-
else:
139-
return obj
140-
141-
decoded_dict = decode_object(g_dict)
142-
143132
def jsonable(v):
144133
if not isinstance(v, ok_types):
145134
return False
@@ -148,11 +137,49 @@ def jsonable(v):
148137
except Exception:
149138
return False
150139
return True
151-
g_dict = {
152-
k:v
153-
for k,v in six.iteritems(decoded_dict)
154-
if jsonable(v) and k not in bad_keys
155-
}
140+
141+
def filter_unserializable(obj):
142+
if isinstance(obj, bytes):
143+
return obj.decode('utf-8')
144+
elif isinstance(obj, list):
145+
new_list = []
146+
for i in obj:
147+
try:
148+
new_obj = filter_unserializable(i)
149+
if jsonable(new_obj):
150+
new_list.append(new_obj)
151+
except Exception as e:
152+
pass # Don't add the item if we can't decode it
153+
return new_list
154+
elif isinstance(obj, dict):
155+
new_dict = {}
156+
for k,v in six.iteritems(obj):
157+
try:
158+
new_key = filter_unserializable(k)
159+
new_value = filter_unserializable(v)
160+
if jsonable(new_value) and jsonable(new_key):
161+
new_dict[new_key] = new_value
162+
except Exception as e:
163+
pass # Don't add the item if we can't decode it
164+
return new_dict
165+
elif isinstance(obj, tuple):
166+
list_for_new_tuple = []
167+
for i in obj:
168+
try:
169+
new_obj = filter_unserializable(i)
170+
if jsonable(new_obj):
171+
list_for_new_tuple.append(new_obj)
172+
except Exception as e:
173+
pass # Don't add the item if we can't decode it
174+
return tuple(list_for_new_tuple)
175+
else:
176+
return obj
177+
178+
for key in bad_keys:
179+
if key in g_dict:
180+
del g_dict[key]
181+
182+
g_dict = filter_unserializable(g_dict)
156183
"""
157184
# Write the globals back to the calling process.
158185
"""
@@ -172,6 +199,12 @@ def jsonable(v):
172199
"python", code=jailed_code, stdin=stdin, files=files, slug=slug,
173200
extra_files=extra_files,
174201
)
202+
203+
if LOG_ALL_CODE:
204+
log.debug("Status: %s", res.status)
205+
log.debug("Stdout: %s", res.stdout)
206+
log.debug("Stderr: %s", res.stderr)
207+
175208
if res.status != 0:
176209
raise SafeExecException((
177210
"Couldn't execute jailed code: stdout: {res.stdout!r}, "
@@ -187,23 +220,60 @@ def json_safe(d):
187220
Used to emulate reading data through a serialization straw.
188221
189222
"""
190-
def decode_object(obj):
223+
224+
ok_types = (type(None), int, float, str, six.text_type, list, tuple, dict)
225+
226+
def jsonable(v):
227+
if not isinstance(v, ok_types):
228+
return False
229+
try:
230+
json.dumps(v)
231+
except Exception:
232+
return False
233+
return True
234+
235+
def filter_unserializable(obj):
191236
if isinstance(obj, bytes):
192-
return obj.decode('utf8')
237+
return obj.decode('utf-8')
193238
elif isinstance(obj, list):
194-
return [decode_object(i) for i in obj]
239+
new_list = []
240+
for i in obj:
241+
try:
242+
new_obj = filter_unserializable(i)
243+
if jsonable(new_obj):
244+
new_list.append(new_obj)
245+
except Exception:
246+
pass # Don't add the item if we can't decode it
247+
return new_list
195248
elif isinstance(obj, dict):
196-
return {k: decode_object(v) for k, v in six.iteritems(obj)}
249+
new_dict = {}
250+
for k,v in six.iteritems(obj):
251+
try:
252+
new_key = filter_unserializable(k)
253+
new_value = filter_unserializable(v)
254+
if jsonable(new_value) and jsonable(new_key):
255+
new_dict[new_key] = new_value
256+
except Exception:
257+
pass # Don't add the item if we can't decode it
258+
return new_dict
197259
elif isinstance(obj, tuple):
198-
return tuple(decode_object(i) for i in obj)
260+
list_for_new_tuple = []
261+
for i in obj:
262+
try:
263+
new_obj = filter_unserializable(i)
264+
if jsonable(new_obj):
265+
list_for_new_tuple.append(new_obj)
266+
except Exception:
267+
pass # Don't add the item if we can't decode it
268+
return tuple(list_for_new_tuple)
199269
else:
200270
return obj
201-
decoded_dict = decode_object(d)
202271

203-
ok_types = (type(None), int, float, str, six.text_type, list, tuple, dict)
272+
serializable_dict = filter_unserializable(d)
273+
204274
bad_keys = ("__builtins__",)
205275
jd = {}
206-
for k, v in six.iteritems(decoded_dict):
276+
for k, v in six.iteritems(serializable_dict):
207277
if not isinstance(v, ok_types):
208278
continue
209279
if k in bad_keys:

codejail/tests/test_safe_exec.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@ def test_set_values(self):
4545
def test_complex_globals(self):
4646
globs = {}
4747
self.safe_exec(
48-
"from builtins import bytes; test_dict = {1: bytes('a', 'utf8'), 2: 'b', 3: {1: bytes('b', 'utf8'), 2: (1, bytes('a', 'utf8'))}}",
48+
textwrap.dedent("""\
49+
from builtins import bytes
50+
test_dict = {1: bytes('a', 'utf8'), 2: 'b', 3: {1: bytes('b', 'utf8'), 2: (1, bytes('a', 'utf8'))}}
51+
foo = "bar"
52+
test_dict_type = type(test_dict)
53+
"""),
4954
globs
5055
)
5156
self.assertDictEqual(globs['test_dict'], {'1': 'a', '2': 'b', '3': {'1': 'b', '2': [1, 'a']}})
@@ -113,14 +118,16 @@ def test_extra_files(self):
113118
]
114119
self.safe_exec(textwrap.dedent("""\
115120
import six
116-
with open("extra.txt", 'rb') as f:
121+
import io
122+
with io.open("extra.txt", 'r') as f:
117123
extra = f.read()
118124
with open("also.dat", 'rb') as f:
119125
if six.PY2:
120126
also = f.read().encode("hex")
121127
else:
122128
also = f.read().hex()
123129
"""), globs, extra_files=extras)
130+
124131
self.assertEqual(globs['extra'], "I'm extra!\n")
125132
self.assertEqual(globs['also'], "01ff02fe")
126133

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name="codejail",
7-
version="2.0",
7+
version="2.1",
88
packages=['codejail'],
99
classifiers=[
1010
"License :: OSI Approved :: Apache Software License",

0 commit comments

Comments
 (0)