ppix.php 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. <?php
  2. class Ppix {
  3. private $handle;
  4. private $locations;
  5. public function __construct($handle) {
  6. $this->handle = $handle;
  7. fseek($handle, 0);
  8. if(fread($handle, 5) != "PPIX\x00") {
  9. throw new Exception("File did not start with PPIX magic number", 1);
  10. }
  11. $this->locations = unpack("Vpub/Vcol/Vtag/Vtre", fread($handle, 16));
  12. }
  13. public function get_publication_count() {
  14. fseek($this->handle, $this->locations["pub"]);
  15. return unpack("V", fread($this->handle, 4))[1];
  16. }
  17. public function get_publication_by_id($id) {
  18. $location = $this->locations["pub"] + 4 + ($id * 6);
  19. fseek($this->handle, $location);
  20. $string_info = unpack("Vloc/vlen", fread($this->handle, 6));
  21. fseek($this->handle, $string_info["loc"]);
  22. return fread($this->handle, $string_info["len"]);
  23. }
  24. public function get_collection_by_id($id) {
  25. $location = $this->locations["col"] + ($id * 6);
  26. fseek($this->handle, $location);
  27. $collection_info = unpack("Vloc/vcount", fread($this->handle, 6));
  28. fseek($this->handle, $collection_info["loc"]);
  29. $values = array();
  30. for ($i=0; $i < $collection_info["count"]; $i++) {
  31. $values[$i] = unpack("V", fread($this->handle, 4))[1];
  32. }
  33. return $values;
  34. }
  35. public function get_tags_count() {
  36. fseek($this->handle, $this->locations["tag"]);
  37. return unpack("v", fread($this->handle, 2))[1];
  38. }
  39. public function get_tags() {
  40. $count = $this->get_tags_count();
  41. $tags = array();
  42. for ($i=0; $i < $count; $i++) {
  43. $tag_data = unpack("Cstrlen/Vcolid", fread($this->handle, 5));
  44. if($tag_data["strlen"] == 0) {
  45. continue;
  46. }
  47. $tag = fread($this->handle, $tag_data["strlen"]);
  48. $tags[$tag] = $tag_data["colid"];
  49. }
  50. return $tags;
  51. }
  52. public function find_word_matches($word) {
  53. $binarr = $this->string_to_bin_arr($word);
  54. $node = $this->read_tree_node($this->locations["tre"]);
  55. foreach ($binarr as $bit) {
  56. if(!$bit and $node["zero"] != 0) {
  57. $node = $this->read_tree_node($node["zero"]);
  58. }
  59. else if($bit and $node["one"] != 0) {
  60. $node = $this->read_tree_node($node["one"]);
  61. }
  62. else {
  63. return null;
  64. }
  65. }
  66. if($node["has"] == 255) {
  67. return $node["col"];
  68. }
  69. return null;
  70. }
  71. private function find_partial_matches($bin_word) {
  72. $binarr = $this->string_to_bin_arr($word);
  73. $node = $this->read_tree_node($this->locations["tre"]);
  74. $built_key = array();
  75. foreach ($binarr as $bit) {
  76. if(!$bit and $node["zero"] != 0) {
  77. array_push($built_key, $bit);
  78. $node = $this->read_tree_node($node["zero"]);
  79. }
  80. else if($bit and $node["one"] != 0) {
  81. array_push($built_key, $bit);
  82. $node = $this->read_tree_node($node["one"]);
  83. }
  84. }
  85. if($node["has"] == 255) {
  86. return $node["col"];
  87. }
  88. return null;
  89. }
  90. private function get_subkeys($key, $node) {
  91. $subkeys = array();
  92. if($node["has"] == 255) {
  93. array_push($subkeys, $key);
  94. }
  95. if($node["one"] != 0) {
  96. $nkey = array_merge($key);
  97. array_push($nkey, true);
  98. array_merge($subkeys, $this->get_subkeys($nkey, $node));
  99. }
  100. if($node["zero"] != 0) {
  101. $nkey = array_merge($key);
  102. array_push($nkey, false);
  103. array_merge($subkeys, $this->get_subkeys($nkey, $node));
  104. }
  105. return $subkeys;
  106. }
  107. private function read_tree_node($location) {
  108. fseek($this->handle, $location);
  109. $data = unpack("Vzero/Chas/Vcol/Vone", fread($this->handle, 13));
  110. return $data;
  111. }
  112. private function string_to_bin_arr($string) {
  113. $data = array();
  114. $refbits = array(1,2,4,8,16,32,64,128);
  115. for ($i=0; $i < strlen($string) * 8; $i++) {
  116. $char = ord($string[intdiv($i,8)]);
  117. $ref = $refbits[$i%8];
  118. $data[$i] = ($char & $ref) == $ref;
  119. }
  120. return $data;
  121. }
  122. public function do_search($query) {
  123. $words = explode(" ", $query);
  124. $results = null;
  125. foreach($words as $word) {
  126. $col = $this->find_word_matches($word);
  127. if($col == null){
  128. return array();
  129. }
  130. $col = $this->get_collection_by_id($col);
  131. if($results == null){
  132. $results = $col;
  133. }
  134. else {
  135. $results = array_intersect($results, $col);
  136. }
  137. if(count($results) == 0) {
  138. return $results;
  139. }
  140. }
  141. return array_values($results);
  142. }
  143. }
  144. ?>