spacepaste

  1.  
  2. #
  3. #
  4. # Mark Grandi - Oct 14, 2014
  5. #
  6. import argparse, sys, zlib, pprint
  7. HEADER="B+Tree Graph Index 2"
  8. NODEREF = "node_ref_lists="
  9. KEYELEMENTS = "key_elements="
  10. LEN = "len="
  11. ROWLENGTH = "row_lengths="
  12. def printOutIndexKeys(args):
  13. '''print out bzr index keys
  14. @param args - the namespace object we get from argparse.parse_args()
  15. '''
  16. def readUntilNewline(fileObj):
  17. ba = bytearray()
  18. while True:
  19. tmp = fileObj.read(1)
  20. if tmp == "\n".encode("utf-8"):
  21. return ba.decode("utf-8")
  22. else:
  23. ba.append(ord(tmp))
  24. def testString(fileObj, expectedString):
  25. test = fileObj.read(len(expectedString))
  26. if not test.decode("utf-8") == expectedString:
  27. sys.exit("String incorrect, expected {} but got {}".format(expectedString, test.decode("utf-8")))
  28. for iterFile in args.infileList:
  29. testString(iterFile, HEADER)
  30. testString(iterFile, "\n")
  31. testString(iterFile, NODEREF)
  32. noderef_val = int(readUntilNewline(iterFile))
  33. testString(iterFile, KEYELEMENTS)
  34. keyelements_val = int(readUntilNewline(iterFile))
  35. testString(iterFile, LEN)
  36. len_val = int(readUntilNewline(iterFile))
  37. testString(iterFile, ROWLENGTH)
  38. rowlength_val = readUntilNewline(iterFile)
  39. print("noderef: {}, keyelements: {}, len: {}, rowlength: {}".format(noderef_val, keyelements_val, len_val, rowlength_val))
  40. start = iterFile.tell()
  41. finish = 4096
  42. numLeaf = 0
  43. numInternal = 0
  44. totalKeysInInternal = 0
  45. while True:
  46. print("######################################")
  47. uncomp = iterFile.read(finish-start)
  48. if not uncomp:
  49. print("eof")
  50. break
  51. print("read from {} to {}".format(start, iterFile.tell()))
  52. decomp = zlib.decompress(uncomp)
  53. splitList = decomp.split(b'\n')
  54. if splitList[0].decode("utf-8") == "type=internal":
  55. numInternal += 1
  56. totalKeysInInternal += len(splitList) - 3 # remove type=, offset= and empty string at end
  57. if splitList[0].decode("utf-8") == "type=leaf":
  58. numLeaf += 1
  59. pprint.pprint(splitList)
  60. start = iterFile.tell()
  61. finish += 4096
  62. print("done, numLeaf: {}, numInternal: {}, totalKeysInInternal: {}".format(numLeaf, numInternal, totalKeysInInternal))
  63. if __name__ == "__main__":
  64. # if we are being run as a real program
  65. parser = argparse.ArgumentParser(description="print out bzr index keys",
  66. epilog="Copyright Oct 14, 2014 - Mark Grandi")
  67. parser.add_argument('infileList', type=argparse.FileType('rb'), nargs="+", help="bzr index to read the keys from")
  68. printOutIndexKeys(parser.parse_args())
  69.