spacepaste

  1.  
  2. import array
  3. import collections
  4. import itertools
  5. # Placeholder constants
  6. FREE = -1
  7. DUMMY = -2
  8. class Dict(collections.MutableMapping):
  9. 'Space efficient dictionary with fast iteration and cheap resizes.'
  10. def _lookup(self, key, hashvalue):
  11. 'Same lookup logic as currently used in real dicts'
  12. assert self.filled < len(self.indices) # At least one open slot
  13. freeslot = None
  14. PERTURB_SHIFT = 5
  15. if hashvalue < 0:
  16. perturb = -hashvalue
  17. else:
  18. perturb = hashvalue
  19. n = len(self.indices)
  20. i = perturb & (n-1)
  21. while True:
  22. index = self.indices[i]
  23. if index == FREE:
  24. return (FREE, i) if freeslot is None else (DUMMY, freeslot)
  25. elif index == DUMMY:
  26. freeslot = i
  27. elif (self.keylist[index] is key or
  28. self.hashlist[index] == hashvalue
  29. and self.keylist[index] == key):
  30. return (index, i)
  31. i = 5 * i + perturb + 1
  32. i = i & (n-1)
  33. perturb >>= PERTURB_SHIFT
  34. @staticmethod
  35. def _make_index(n):
  36. 'New sequence of indices using the smallest possible datatype'
  37. if n <= 2**7: return array.array('b', [FREE]) * n # signed char
  38. if n <= 2**15: return array.array('h', [FREE]) * n # signed short
  39. if n <= 2**31: return array.array('l', [FREE]) * n # signed long
  40. return [FREE] * n # python integers
  41. def _resize(self, n):
  42. '''Reindex the existing hash/key/value entries.
  43. Entries do not get moved, they only get new indices.
  44. No calls are made to hash() or __eq__().
  45. '''
  46. n = 2 ** n.bit_length() # round-up to power-of-two
  47. self.indices = self._make_index(n)
  48. PERTURB_SHIFT = 5
  49. for index, hashvalue in enumerate(self.hashlist):
  50. if hashvalue < 0:
  51. perturb = -hashvalue
  52. else:
  53. perturb = hashvalue
  54. i = hashvalue & (n-1)
  55. while True:
  56. if self.indices[i] == FREE:
  57. break
  58. i = 5 * i + perturb + 1
  59. i = i & (n-1)
  60. perturb >>= PERTURB_SHIFT
  61. self.indices[i] = index
  62. self.filled = self.used
  63. def clear(self):
  64. self.indices = self._make_index(8)
  65. self.hashlist = []
  66. self.keylist = []
  67. self.valuelist = []
  68. self.used = 0
  69. self.filled = 0 # used + dummies
  70. def __init__(self, *args, **kwds):
  71. try:
  72. self.keylist
  73. except AttributeError:
  74. self.clear()
  75. self.update(*args, **kwds)
  76. def __getitem__(self, key):
  77. hashvalue = hash(key)
  78. index, i = self._lookup(key, hashvalue)
  79. if index < 0:
  80. raise KeyError(key)
  81. return self.valuelist[index]
  82. def __setitem__(self, key, value):
  83. hashvalue = hash(key)
  84. index, i = self._lookup(key, hashvalue)
  85. if index < 0:
  86. self.indices[i] = self.used
  87. self.hashlist.append(hashvalue)
  88. self.keylist.append(key)
  89. self.valuelist.append(value)
  90. self.used += 1
  91. if index == FREE:
  92. self.filled += 1
  93. if self.filled * 3 > len(self.indices) * 2:
  94. self._resize(4 * len(self))
  95. else:
  96. self.valuelist[index] = value
  97. def __delitem__(self, key):
  98. hashvalue = hash(key)
  99. index, i = self._lookup(key, hashvalue)
  100. if index < 0:
  101. raise KeyError(key)
  102. self.indices[i] = DUMMY
  103. self.used -= 1
  104. # If needed, swap with the lastmost entry to avoid leaving a "hole"
  105. if index != self.used:
  106. lasthash = self.hashlist[-1]
  107. lastkey = self.keylist[-1]
  108. lastvalue = self.valuelist[-1]
  109. lastindex, j = self._lookup(lastkey, lasthash)
  110. assert lastindex >= 0 and i != j
  111. self.indices[j] = index
  112. self.hashlist[index] = lasthash
  113. self.keylist[index] = lastkey
  114. self.valuelist[index] = lastvalue
  115. # Remove the lastmost entry
  116. self.hashlist.pop()
  117. self.keylist.pop()
  118. self.valuelist.pop()
  119. def __len__(self):
  120. return self.used
  121. def __iter__(self):
  122. return iter(self.keylist)
  123. def iterkeys(self):
  124. return iter(self.keylist)
  125. def keys(self):
  126. return list(self.keylist)
  127. def itervalues(self):
  128. return iter(self.valuelist)
  129. def values(self):
  130. return list(self.valuelist)
  131. def iteritems(self):
  132. return itertools.izip(self.keylist, self.valuelist)
  133. def items(self):
  134. return zip(self.keylist, self.valuelist)
  135. def __contains__(self, key):
  136. index, i = self._lookup(key, hash(key))
  137. return index >= 0
  138. def get(self, key, default=None):
  139. 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.'
  140. index, i = self._lookup(key, hash(key))
  141. return self.valuelist[index] if index >= 0 else default
  142. def popitem(self):
  143. ''' D.popitem() -> (k, v), remove and return some (key, value) pair as a
  144. 2-tuple; but raise KeyError if D is empty.
  145. '''
  146. try:
  147. key = self.keylist[-1]
  148. value = self.valuelist[-1]
  149. except IndexError:
  150. raise KeyError( 'popitem(): dictionary is empty')
  151. del self[key]
  152. return key, value
  153. def __repr__(self):
  154. return 'Dict(%r)' % self.items()
  155. def show_structure(self):
  156. 'Diagnostic method. Not part of the API.'
  157. print '=' * 50
  158. print self
  159. print 'Indices:', self.indices
  160. for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
  161. print i, row
  162. print '-' * 50
  163. if __name__ == '__main__':
  164. import sys
  165. def f():
  166. if len(sys.argv) > 1:
  167. d = {}
  168. else:
  169. d = Dict()
  170. class A(object):
  171. pass
  172. for i in range(10000000):
  173. d[i] = A()
  174. f()
  175.