ppix.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. <?php
  2. class Ppix {
  3. private $handle;
  4. private $locations;
  5. public function __construct($handle) {
  6. $this->handle = $handle;
  7. fseek($handle, 0);
  8. if(fread($handle, 5) != "PPIX\x00") {
  9. throw new Exception("File did not start with PPIX magic number", 1);
  10. }
  11. $this->locations = unpack("Vpub/Vcol/Vtag/Vtre", fread($handle, 16));
  12. }
  13. public function get_publication_count() {
  14. fseek($this->handle, $this->locations["pub"]);
  15. return unpack("V", fread($this->handle, 4))[1];
  16. }
  17. public function get_publication_by_id($id) {
  18. error_log("Get by id " . $id);
  19. $location = $this->locations["pub"] + 4 + ($id * 6);
  20. error_log("Location " . $location);
  21. fseek($this->handle, $location);
  22. $string_info = unpack("Vloc/vlen", fread($this->handle, 6));
  23. error_log("String loc/len " . $string_info["loc"] . "/" . $string_info["len"]);
  24. fseek($this->handle, $string_info["loc"]);
  25. return fread($this->handle, $string_info["len"]);
  26. }
  27. public function get_collection_by_id($id) {
  28. $location = $this->locations["col"] + ($id * 6);
  29. fseek($this->handle, $location);
  30. $collection_info = unpack("Vloc/vcount", fread($this->handle, 6));
  31. fseek($this->handle, $collection_info["loc"]);
  32. $values = array();
  33. for ($i=0; $i < $collection_info["count"]; $i++) {
  34. $values[$i] = unpack("V", fread($this->handle, 4))[1];
  35. }
  36. return $values;
  37. }
  38. public function get_tags_count() {
  39. fseek($this->handle, $this->locations["tag"]);
  40. return unpack("v", fread($this->handle, 2))[1];
  41. }
  42. public function get_tags() {
  43. $count = $this->get_tags_count();
  44. $tags = array();
  45. for ($i=0; $i < $count; $i++) {
  46. $tag_data = unpack("Cstrlen/Vcolid", fread($this->handle, 5));
  47. if($tag_data["strlen"] == 0) {
  48. continue;
  49. }
  50. $tag = fread($this->handle, $tag_data["strlen"]);
  51. $tags[$tag] = $tag_data["colid"];
  52. }
  53. return $tags;
  54. }
  55. public function find_word_matches($word) {
  56. $binarr = $this->string_to_bin_arr($word);
  57. $node = $this->read_tree_node($this->locations["tre"]);
  58. foreach ($binarr as $bit) {
  59. if(!$bit and $node["zero"] != 0) {
  60. $node = $this->read_tree_node($node["zero"]);
  61. }
  62. else if($bit and $node["one"] != 0) {
  63. $node = $this->read_tree_node($node["one"]);
  64. }
  65. else {
  66. return null;
  67. }
  68. }
  69. if($node["has"] == 255) {
  70. return $node["col"];
  71. }
  72. return null;
  73. }
  74. private function find_partial_matches($bin_word) {
  75. $binarr = $this->string_to_bin_arr($word);
  76. $node = $this->read_tree_node($this->locations["tre"]);
  77. $built_key = array();
  78. foreach ($binarr as $bit) {
  79. if(!$bit and $node["zero"] != 0) {
  80. array_push($built_key, $bit);
  81. $node = $this->read_tree_node($node["zero"]);
  82. }
  83. else if($bit and $node["one"] != 0) {
  84. array_push($built_key, $bit);
  85. $node = $this->read_tree_node($node["one"]);
  86. }
  87. }
  88. if($node["has"] == 255) {
  89. return $node["col"];
  90. }
  91. return null;
  92. }
  93. private function get_subkeys($key, $node) {
  94. $subkeys = array();
  95. if($node["has"] == 255) {
  96. array_push($subkeys, $key);
  97. }
  98. if($node["one"] != 0) {
  99. $nkey = array_merge($key);
  100. array_push($nkey, true);
  101. array_merge($subkeys, $this->get_subkeys($nkey, $node));
  102. }
  103. if($node["zero"] != 0) {
  104. $nkey = array_merge($key);
  105. array_push($nkey, false);
  106. array_merge($subkeys, $this->get_subkeys($nkey, $node));
  107. }
  108. return $subkeys;
  109. }
  110. private function read_tree_node($location) {
  111. fseek($this->handle, $location);
  112. $data = unpack("Vzero/Chas/Vcol/Vone", fread($this->handle, 13));
  113. return $data;
  114. }
  115. private function string_to_bin_arr($string) {
  116. $data = array();
  117. $refbits = array(1,2,4,8,16,32,64,128);
  118. for ($i=0; $i < strlen($string) * 8; $i++) {
  119. $char = ord($string[intdiv($i,8)]);
  120. $ref = $refbits[$i%8];
  121. $data[$i] = ($char & $ref) == $ref;
  122. }
  123. return $data;
  124. }
  125. public function do_search($query) {
  126. $words = explode(" ", $query);
  127. $results = null;
  128. foreach($words as $word) {
  129. $col = $this->find_word_matches($word);
  130. if($col == null){
  131. return array();
  132. }
  133. $col = $this->get_collection_by_id($col);
  134. if($results == null){
  135. $results = $col;
  136. }
  137. else {
  138. $results = array_intersect($results, $col);
  139. }
  140. if(count($results) == 0) {
  141. return $results;
  142. }
  143. }
  144. return array_values($results);
  145. }
  146. }
  147. ?>