# # # Mark Grandi - Oct 14, 2014 # import argparse, sys, zlib, pprint HEADER="B+Tree Graph Index 2" NODEREF = "node_ref_lists=" KEYELEMENTS = "key_elements=" LEN = "len=" ROWLENGTH = "row_lengths=" def printOutIndexKeys(args): '''print out bzr index keys @param args - the namespace object we get from argparse.parse_args() ''' import pdb;pdb.set_trace() def readUntilNewline(fileObj): ba = bytearray() while True: tmp = fileObj.read(1) if tmp == "\n".encode("utf-8"): return ba.decode("utf-8") else: ba.append(ord(tmp)) def testString(fileObj, expectedString): test = fileObj.read(len(expectedString)) if not test.decode("utf-8") == expectedString: sys.exit("String incorrect, expected {0} but got {1}".format(expectedString, test.decode("utf-8"))) for iterFile in args.infileList: testString(iterFile, HEADER) testString(iterFile, "\n") testString(iterFile, NODEREF) noderef_val = int(readUntilNewline(iterFile)) testString(iterFile, KEYELEMENTS) keyelements_val = int(readUntilNewline(iterFile)) testString(iterFile, LEN) len_val = int(readUntilNewline(iterFile)) testString(iterFile, ROWLENGTH) rowlength_val = readUntilNewline(iterFile) print("noderef: {0}, keyelements: {1}, len: {2}, rowlength: {3}".format(noderef_val, keyelements_val, len_val, rowlength_val)) start = iterFile.tell() finish = 4096 numLeaf = 0 numInternal = 0 totalKeysInInternal = 0 while True: print("######################################") uncomp = iterFile.read(finish-start) if not uncomp: print("eof") break print("read from {0} to {1}".format(start, iterFile.tell())) decomp = zlib.decompress(uncomp) splitList = decomp.split(b'\n') if splitList[0].decode("utf-8") == "type=internal": numInternal += 1 totalKeysInInternal += len(splitList) - 3 # remove type=, offset= and empty string at end if splitList[0].decode("utf-8") == "type=leaf": numLeaf += 1 pprint.pprint(splitList) start = iterFile.tell() finish += 4096 print("done, numLeaf: {0}, numInternal: {1}, totalKeysInInternal: {2}".format(numLeaf, numInternal, totalKeysInInternal)) if __name__ == "__main__": # if we are being run as a real program parser = argparse.ArgumentParser(description="print out bzr index keys", epilog="Copyright Oct 14, 2014 - Mark Grandi") parser.add_argument('infileList', type=argparse.FileType('rb'), nargs="+", help="bzr index to read the keys from") printOutIndexKeys(parser.parse_args())