spacepaste

  1.  
  2. #
  3. #
  4. # Mark Grandi - Oct 14, 2014
  5. #
  6. import argparse, sys, zlib, pprint
  7. HEADER="B+Tree Graph Index 2"
  8. NODEREF = "node_ref_lists="
  9. KEYELEMENTS = "key_elements="
  10. LEN = "len="
  11. ROWLENGTH = "row_lengths="
  12. def printOutIndexKeys(args):
  13. '''print out bzr index keys
  14. @param args - the namespace object we get from argparse.parse_args()
  15. '''
  16. import pdb;pdb.set_trace()
  17. def readUntilNewline(fileObj):
  18. ba = bytearray()
  19. while True:
  20. tmp = fileObj.read(1)
  21. if tmp == "\n".encode("utf-8"):
  22. return ba.decode("utf-8")
  23. else:
  24. ba.append(ord(tmp))
  25. def testString(fileObj, expectedString):
  26. test = fileObj.read(len(expectedString))
  27. if not test.decode("utf-8") == expectedString:
  28. sys.exit("String incorrect, expected {0} but got {1}".format(expectedString, test.decode("utf-8")))
  29. for iterFile in args.infileList:
  30. testString(iterFile, HEADER)
  31. testString(iterFile, "\n")
  32. testString(iterFile, NODEREF)
  33. noderef_val = int(readUntilNewline(iterFile))
  34. testString(iterFile, KEYELEMENTS)
  35. keyelements_val = int(readUntilNewline(iterFile))
  36. testString(iterFile, LEN)
  37. len_val = int(readUntilNewline(iterFile))
  38. testString(iterFile, ROWLENGTH)
  39. rowlength_val = readUntilNewline(iterFile)
  40. print("noderef: {0}, keyelements: {1}, len: {2}, rowlength: {3}".format(noderef_val, keyelements_val, len_val, rowlength_val))
  41. start = iterFile.tell()
  42. finish = 4096
  43. numLeaf = 0
  44. numInternal = 0
  45. totalKeysInInternal = 0
  46. while True:
  47. print("######################################")
  48. uncomp = iterFile.read(finish-start)
  49. if not uncomp:
  50. print("eof")
  51. break
  52. print("read from {0} to {1}".format(start, iterFile.tell()))
  53. decomp = zlib.decompress(uncomp)
  54. splitList = decomp.split(b'\n')
  55. if splitList[0].decode("utf-8") == "type=internal":
  56. numInternal += 1
  57. totalKeysInInternal += len(splitList) - 3 # remove type=, offset= and empty string at end
  58. if splitList[0].decode("utf-8") == "type=leaf":
  59. numLeaf += 1
  60. pprint.pprint(splitList)
  61. start = iterFile.tell()
  62. finish += 4096
  63. print("done, numLeaf: {0}, numInternal: {1}, totalKeysInInternal: {2}".format(numLeaf, numInternal, totalKeysInInternal))
  64. if __name__ == "__main__":
  65. # if we are being run as a real program
  66. parser = argparse.ArgumentParser(description="print out bzr index keys",
  67. epilog="Copyright Oct 14, 2014 - Mark Grandi")
  68. parser.add_argument('infileList', type=argparse.FileType('rb'), nargs="+", help="bzr index to read the keys from")
  69. printOutIndexKeys(parser.parse_args())
  70.