spacepaste

  1.  
  2. import io
  3. import struct
  4. from functools import wraps
  5. _OT_NONE = 0
  6. _OT_TRUE = 1
  7. _OT_FALSE = 2
  8. _OT_INT = 3
  9. _OT_REAL = 4
  10. _OT_STR = 5
  11. _OT_BYTES = 6
  12. _OT_LIST = 7
  13. _OT_MAP = 8
  14. _OT_SET = 9
  15. _OT_NINT_BYTE = 0
  16. _OT_NINT_SHORT = 1
  17. _OT_NINT_INT = 2
  18. _OT_NINT_LONG = 3
  19. _OT_PINT_BYTE = 4
  20. _OT_PINT_SHORT = 5
  21. _OT_PINT_INT = 6
  22. _OT_PINT_LONG = 7
  23. _OT_INT_STR = 8
  24. _OT_NOT_LINK = 0
  25. _OT_LINK = 1
  26. class BinarySerializationDeserializationException(Exception):
  27. def __init__(self, *args, **kwargs):
  28. super(BinarySerializationDeserializationException, self).__init__(*args, **kwargs)
  29. class BinarySerializationException(BinarySerializationDeserializationException):
  30. def __init__(self, *args, **kwargs):
  31. super(BinarySerializationException, self).__init__(*args, **kwargs)
  32. class BinaryDeserializationException(BinarySerializationDeserializationException):
  33. def __init__(self, *args, **kwargs):
  34. super(BinaryDeserializationException, self).__init__(*args, **kwargs)
  35. def _boundary_check(bytes_count):
  36. def _boundary_check_decorator(fnc):
  37. @wraps(fnc)
  38. def _boundary_check_wrapper(bytedata, offset, maxlen, *args, **kwargs):
  39. if offset+bytes_count > maxlen:
  40. raise BinaryDeserializationException("Attempting to read past data, maxlen %s, read attempt for %s at %s" % (str(maxlen), str(bytes_count), str(offset)))
  41. return fnc(bytedata, offset, maxlen, *args, **kwargs)
  42. return _boundary_check_wrapper
  43. return _boundary_check_decorator
  44. def dump(py_primitive):
  45. writer = io.BytesIO()
  46. try:
  47. _dump(writer, py_primitive, _ValueCache())
  48. return writer.getvalue()
  49. finally:
  50. writer.close()
  51. def _dump(w, py_value, cache):
  52. if py_value is None:
  53. w.write(_get_type_as_byte(_OT_NONE))
  54. elif py_value is True:
  55. w.write(_get_type_as_byte(_OT_TRUE))
  56. elif py_value is False:
  57. w.write(_get_type_as_byte(_OT_FALSE))
  58. elif isinstance(py_value, int):
  59. _dump_int(w, py_value)
  60. elif isinstance(py_value, float):
  61. _dump_float(w, py_value)
  62. elif isinstance(py_value, str):
  63. _dump_str(w, py_value)
  64. elif isinstance(py_value, bytes):
  65. _dump_bytes(w, py_value)
  66. elif isinstance(py_value, dict):
  67. _dump_dict(w, py_value, cache)
  68. elif isinstance(py_value, list):
  69. _dump_list(w, py_value, cache)
  70. elif isinstance(py_value, set):
  71. _dump_set(w, py_value, cache)
  72. else:
  73. raise BinarySerializationException("Not supported type %s: %s" % (type(py_value), str(py_value)))
  74. def _dump_int(w, i):
  75. w.write(_get_type_as_byte(_OT_INT))
  76. is_negative = i < 0
  77. if is_negative:
  78. i = abs(i)
  79. bit_len = i.bit_length()
  80. if bit_len <= 8:
  81. if is_negative:
  82. w.write(_get_type_as_byte(_OT_NINT_BYTE))
  83. else:
  84. w.write(_get_type_as_byte(_OT_PINT_BYTE))
  85. w.write(struct.pack("!B", i))
  86. elif bit_len <= 16:
  87. if is_negative:
  88. w.write(_get_type_as_byte(_OT_NINT_SHORT))
  89. else:
  90. w.write(_get_type_as_byte(_OT_PINT_SHORT))
  91. w.write(struct.pack("!H", i))
  92. elif bit_len <= 32:
  93. if is_negative:
  94. w.write(_get_type_as_byte(_OT_NINT_INT))
  95. else:
  96. w.write(_get_type_as_byte(_OT_PINT_INT))
  97. w.write(struct.pack("!I", i))
  98. elif bit_len <= 64:
  99. if is_negative:
  100. w.write(_get_type_as_byte(_OT_NINT_LONG))
  101. else:
  102. w.write(_get_type_as_byte(_OT_PINT_LONG))
  103. w.write(struct.pack("!Q", i))
  104. else:
  105. w.write(_get_type_as_byte(_OT_INT_STR))
  106. _dump_str(w, str(i) if not is_negative else ("-" + str(i)))
  107. def _dump_float(w, f):
  108. w.write(_get_type_as_byte(_OT_REAL))
  109. w.write(struct.pack("!d", f))
  110. def _dump_str(w, string):
  111. w.write(_get_type_as_byte(_OT_STR))
  112. _dump_bytes(w, string.encode("utf-8"))
  113. def _dump_bytes(w, bytestring):
  114. w.write(_get_type_as_byte(_OT_BYTES))
  115. w.write(struct.pack("!I", len(bytestring)))
  116. w.write(bytestring)
  117. def _dump_dict(w, d, cache):
  118. w.write(_get_type_as_byte(_OT_MAP))
  119. cache_id, already_cached = cache.append_to_cache(d)
  120. if not already_cached:
  121. w.write(_get_type_as_byte(_OT_NOT_LINK))
  122. else:
  123. w.write(_get_type_as_byte(_OT_LINK))
  124. w.write(struct.pack("!I", cache_id))
  125. return
  126. keys = d.keys()
  127. w.write(struct.pack("!I", len(keys)))
  128. for key in keys:
  129. _dump(w, key, cache)
  130. _dump(w, d[key], cache)
  131. def _dump_list(w, l, cache):
  132. w.write(_get_type_as_byte(_OT_LIST))
  133. cache_id, already_cached = cache.append_to_cache(l)
  134. if not already_cached:
  135. w.write(_get_type_as_byte(_OT_NOT_LINK))
  136. else:
  137. w.write(_get_type_as_byte(_OT_LINK))
  138. w.write(struct.pack("!I", cache_id))
  139. return
  140. w.write(struct.pack("!I", len(l)))
  141. for py_value in l:
  142. _dump(w, py_value, cache)
  143. def _dump_set(w, s, cache):
  144. w.write(_get_type_as_byte(_OT_SET))
  145. cache_id, already_cached = cache.append_to_cache(s)
  146. if not already_cached:
  147. w.write(_get_type_as_byte(_OT_NOT_LINK))
  148. else:
  149. w.write(_get_type_as_byte(_OT_LINK))
  150. w.write(struct.pack("!I", cache_id))
  151. return
  152. w.write(struct.pack("!I", len(s)))
  153. for py_value in s:
  154. _dump(w, py_value, cache)
  155. def _get_type_as_byte(t):
  156. return bytes((t,))
  157. def load(bytedata):
  158. if len(bytedata) == 0:
  159. raise BinaryDeserializationException("empty datasource")
  160. return _load(bytedata, 0, len(bytedata), _ValueCache())[0]
  161. @_boundary_check(1)
  162. def _load(bytedata, offset, maxlen, cache):
  163. py_type = bytedata[offset]
  164. offset += 1
  165. if py_type == _OT_NONE:
  166. return None, offset
  167. elif py_type == _OT_TRUE:
  168. return True, offset
  169. elif py_type == _OT_FALSE:
  170. return False, offset
  171. elif py_type == _OT_INT:
  172. return _load_int(bytedata, offset, maxlen)
  173. elif py_type == _OT_REAL:
  174. return _load_float(bytedata, offset, maxlen)
  175. elif py_type == _OT_STR:
  176. return _load_str(bytedata, offset, maxlen)
  177. elif py_type == _OT_BYTES:
  178. return _load_bytes(bytedata, offset, maxlen)
  179. elif py_type == _OT_MAP:
  180. return _load_dict(bytedata, offset, maxlen, cache)
  181. elif py_type == _OT_LIST:
  182. return _load_list(bytedata, offset, maxlen, cache)
  183. elif py_type == _OT_SET:
  184. return _load_set(bytedata, offset, maxlen, cache)
  185. else:
  186. raise BinaryDeserializationException("wrong type specifier %s" %str(py_type))
  187. @_boundary_check(1)
  188. def _load_int(bytedata, offset, maxlen):
  189. subtype = bytedata[offset]
  190. offset += 1
  191. if subtype == _OT_NINT_BYTE or subtype == _OT_PINT_BYTE:
  192. return _load_int_byte(bytedata, offset, maxlen, subtype == _OT_NINT_BYTE)
  193. elif subtype == _OT_NINT_SHORT or subtype == _OT_PINT_SHORT:
  194. return _load_int_short(bytedata, offset, maxlen, subtype == _OT_NINT_SHORT)
  195. elif subtype == _OT_NINT_INT or subtype == _OT_PINT_INT:
  196. return _load_int_int(bytedata, offset, maxlen, subtype == _OT_NINT_INT)
  197. elif subtype == _OT_NINT_LONG or subtype == _OT_PINT_LONG:
  198. return _load_int_long(bytedata, offset, maxlen, subtype == _OT_NINT_LONG)
  199. elif subtype == _OT_INT_STR:
  200. return _load(bytedata, offset, maxlen)
  201. else:
  202. raise BinaryDeserializationException("incorrect byte for int type %s" % (str(subtype)))
  203. @_boundary_check(1)
  204. def _load_int_byte(bytedata, offset, _maxlen, is_negative):
  205. return _load_int_value(bytedata, offset, "!B", 1, is_negative)
  206. @_boundary_check(2)
  207. def _load_int_short(bytedata, offset, _maxlen, is_negative):
  208. return _load_int_value(bytedata, offset, "!H", 2, is_negative)
  209. @_boundary_check(4)
  210. def _load_int_int(bytedata, offset, _maxlen, is_negative):
  211. return _load_int_value(bytedata, offset, "!I", 4, is_negative)
  212. @_boundary_check(8)
  213. def _load_int_long(bytedata, offset, _maxlen, is_negative):
  214. return _load_int_value(bytedata, offset, "!Q", 8, is_negative)
  215. def _load_int_value(bytedata, offset, decode_frm, move_offset, is_negative):
  216. value = struct.unpack_from(decode_frm, bytedata, offset)[0]
  217. offset += move_offset
  218. if is_negative:
  219. value = -value
  220. return value, offset
  221. @_boundary_check(8)
  222. def _load_float(bytedata, offset, _maxlen):
  223. return struct.unpack_from("!d", bytedata, offset)[0], offset+8
  224. @_boundary_check(1)
  225. def _load_str(bytedata, offset, maxlen):
  226. subtype = bytedata[offset]
  227. offset += 1
  228. if subtype != _OT_BYTES:
  229. raise BinaryDeserializationException("incorrect byte for bytes type %s" %(str(subtype)))
  230. b, offset = _load_bytes(bytedata, offset, maxlen)
  231. return str(b, "utf-8"), offset
  232. @_boundary_check(4)
  233. def _load_bytes(bytedata, offset, maxlen):
  234. length = struct.unpack_from("!I", bytedata, offset)[0]
  235. offset += 4
  236. @_boundary_check(length)
  237. def dynamic_get(bytedata, offset, _maxlen):
  238. v = bytearray(length)
  239. for ix in range(length):
  240. v[ix] = bytedata[ix+offset]
  241. return bytes(v), offset+length
  242. return dynamic_get(bytedata, offset, maxlen)
  243. @_boundary_check(1)
  244. def _load_cached(bytedata, offset, maxlen, cache, loader):
  245. cached = bytedata[offset]
  246. offset += 1
  247. if cached == _OT_LINK:
  248. return _get_cached(bytedata, offset, maxlen, cache)
  249. else:
  250. return loader(bytedata, offset, maxlen, cache)
  251. @_boundary_check(4)
  252. def _get_cached(bytedata, offset, _maxlen, cache):
  253. return cache.get_cached(struct.unpack_from("!I", bytedata, offset)[0]), offset + 4
  254. def _load_dict(bytedata, offset, maxlen, cache):
  255. return _load_cached(bytedata, offset, maxlen, cache, _load_dict_actual)
  256. @_boundary_check(4)
  257. def _load_dict_actual(bytedata, offset, maxlen, cache):
  258. v = dict()
  259. cache.append_to_cache(v)
  260. length = struct.unpack_from("!I", bytedata, offset)[0]
  261. offset += 4
  262. for _ix in range(length):
  263. key, offset = _load(bytedata, offset, maxlen, cache)
  264. value, offset = _load(bytedata, offset, maxlen, cache)
  265. v[key] = value
  266. return v, offset
  267. def _load_list(bytedata, offset, maxlen, cache):
  268. return _load_cached(bytedata, offset, maxlen, cache, _load_list_actual)
  269. @_boundary_check(4)
  270. def _load_list_actual(bytedata, offset, maxlen, cache):
  271. v = list()
  272. cache.append_to_cache(v)
  273. length = struct.unpack_from("!I", bytedata, offset)[0]
  274. offset += 4
  275. for _ix in range(length):
  276. value, offset = _load(bytedata, offset, maxlen, cache)
  277. v.append(value)
  278. return v, offset
  279. def _load_set(bytedata, offset, maxlen, cache):
  280. return _load_cached(bytedata, offset, maxlen, cache, _load_set_actual)
  281. @_boundary_check(4)
  282. def _load_set_actual(bytedata, offset, maxlen, cache):
  283. v = set()
  284. cache.append_to_cache(v)
  285. length = struct.unpack_from("!I", bytedata, offset)[0]
  286. offset += 4
  287. for _ix in range(length):
  288. value, offset = _load(bytedata, offset, maxlen, cache)
  289. v.add(value)
  290. return v, offset
  291. class _ValueCache(object):
  292. def __init__(self):
  293. self._objectmap = {}
  294. self._valmap = {}
  295. self._nextid = 0
  296. def append_to_cache(self, obj):
  297. if id(obj) not in self._objectmap:
  298. new_id = self._nextid
  299. self._nextid += 1
  300. self._objectmap[id(obj)] = new_id
  301. self._valmap[new_id] = obj
  302. return new_id, False
  303. return self._objectmap[id(obj)], True
  304. def get_cached(self, oid):
  305. if oid in self._valmap:
  306. return self._valmap[oid]
  307. raise BinaryDeserializationException("object not in cache!")
  308.