ppix.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import struct
  2. import typing
  3. class Ppix:
  4. def __init__(self, stream):
  5. self.__stream = stream
  6. stream.seek(0)
  7. if(stream.read(5) != b"PPIX\x00"):
  8. raise Exception("Stream does not begin with PPIX magic number")
  9. self.__publication_index_location, self.__collection_index_location, self.__tag_index_location, self.__word_tree_root_location = struct.unpack("<IIII", stream.read(16))
  10. def get_publications_count(self) -> int:
  11. self.__stream.seek(self.__publication_index_location)
  12. return struct.unpack("<I", self.__stream.read(4))[0]
  13. def get_publication_by_id(self, id) -> str:
  14. position = self.__publication_index_location + 4 + (id * 6)
  15. self.__stream.seek(position)
  16. string_location, string_length = struct.unpack("<IH", self.__stream.read(6))
  17. self.__stream.seek(string_location)
  18. return self.__stream.read(string_length).decode("utf-8")
  19. def get_collection_by_id(self, id) -> typing.List[int]:
  20. position = self.__collection_index_location + (id * 6)
  21. self.__stream.seek(position)
  22. collection_location, collection_item_count = struct.unpack("<IH", self.__stream.read(6))
  23. self.__stream.seek(collection_location)
  24. return struct.unpack("<{}".format("I"*collection_item_count), self.__stream.read(collection_item_count * 4))
  25. def get_tags_count(self) -> int:
  26. self.__stream.seek(self.__tag_index_location)
  27. return struct.unpack("<H", self.__stream.read(2))[0]
  28. def get_tags(self):
  29. count = self.get_tags_count()
  30. for i in range(count):
  31. tag_string_length, collection_id = struct.unpack("<BI", self.__stream.read(5))
  32. yield (self.__stream.read(tag_string_length).decode('utf-8'), collection_id)
  33. def find_word_matches(self, word):
  34. node = self.__get_word_node_from_string(word)
  35. return node[2] if node != None else None
  36. def __get_word_node_from_string(self, word):
  37. bin_string = self.__string_to_bin(word)
  38. node = self.__read_tree_node(self.__word_tree_root_location)
  39. for bit in bin_string:
  40. if(bit == "0" and node[0] != 0):
  41. node = self.__read_tree_node(node[0])
  42. elif(bit == "1" and node[1] != 0):
  43. node = self.__read_tree_node(node[1])
  44. else:
  45. return None
  46. return node
  47. def __read_tree_node(self, position):
  48. self.__stream.seek(position)
  49. c0, has_col, col, c1 = struct.unpack("<IBII", self.__stream.read(13))
  50. if(has_col != 255):
  51. col == None
  52. return (c0, c1, col)
  53. def __string_to_bin(self, string):
  54. data = string.encode("utf-8")
  55. array = []
  56. for byte in data:
  57. for i in [1,2,4,8,16,32,64,128]:
  58. array.append(byte & i == i)
  59. return "".join(("1" if x else "0" for x in array))