ppix.php 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. <?php
  2. class Ppix {
  3. private $handle;
  4. private $locations;
  5. public function __construct($handle) {
  6. $this->handle = $handle;
  7. fseek($handle, 0);
  8. if(fread($handle, 5) != "PPIX\x00") {
  9. throw new Exception("File did not start with PPIX magic number", 1);
  10. }
  11. $this->locations = unpack("Vpub/Vcol/Vtag/Vtre", fread($handle, 16));
  12. }
  13. public function get_publication_count() {
  14. fseek($this->handle, $this->locations["pub"]);
  15. return unpack("V", fread($this->handle, 4))[1];
  16. }
  17. public function get_publication_by_id($id) {
  18. $location = $this->locations["pub"] + 4 + ($id * 6);
  19. fseek($this->handle, $location);
  20. $string_info = unpack("Vloc/vlen", fread($this->handle, 6));
  21. fseek($this->handle, $string_info["loc"]);
  22. return fread($this->handle, $string_info["len"]);
  23. }
  24. public function get_collection_by_id($id) {
  25. $location = $this->locations["col"] + ($id * 6);
  26. fseek($this->handle, $location);
  27. $collection_info = unpack("Vloc/vcount", fread($this->handle, 6));
  28. fseek($this->handle, $collection_info["loc"]);
  29. $values = array();
  30. for ($i=0; $i < $collection_info["count"]; $i++) {
  31. $values[$i] = unpack("V", fread($this->handle, 4))[1];
  32. }
  33. return $values;
  34. }
  35. public function get_tags_count() {
  36. fseek($this->handle, $this->locations["tag"]);
  37. return unpack("v", fread($this->handle, 2))[1];
  38. }
  39. public function get_tags() {
  40. $count = $this->get_tags_count();
  41. $tags = array();
  42. for ($i=0; $i < $count; $i++) {
  43. $tag_data = unpack("Cstrlen/Vcolid", fread($this->handle, 5));
  44. $tag = fread($this->handle, $tag_data["strlen"]);
  45. $tags[$tag] = $tag_data["colid"];
  46. }
  47. return $tags;
  48. }
  49. public function find_word_matches($word) {
  50. $binarr = $this->string_to_bin_arr($word);
  51. $node = $this->read_tree_node($this->locations["tre"]);
  52. foreach ($binarr as $bit) {
  53. if(!$bit and $node["zero"] != 0) {
  54. $node = $this->read_tree_node($node["zero"]);
  55. }
  56. else if($bit and $node["one"] != 0) {
  57. $node = $this->read_tree_node($node["one"]);
  58. }
  59. else {
  60. return null;
  61. }
  62. }
  63. if($node["has"] == 255) {
  64. return $node["col"];
  65. }
  66. return null;
  67. }
  68. private function find_partial_matches($bin_word) {
  69. $binarr = $this->string_to_bin_arr($word);
  70. $node = $this->read_tree_node($this->locations["tre"]);
  71. $built_key = array();
  72. foreach ($binarr as $bit) {
  73. if(!$bit and $node["zero"] != 0) {
  74. array_push($built_key, $bit);
  75. $node = $this->read_tree_node($node["zero"]);
  76. }
  77. else if($bit and $node["one"] != 0) {
  78. array_push($built_key, $bit);
  79. $node = $this->read_tree_node($node["one"]);
  80. }
  81. }
  82. if($node["has"] == 255) {
  83. return $node["col"];
  84. }
  85. return null;
  86. }
  87. private function get_subkeys($key, $node) {
  88. $subkeys = array();
  89. if($node["has"] == 255) {
  90. array_push($subkeys, $key);
  91. }
  92. if($node["one"] != 0) {
  93. $nkey = array_merge($key);
  94. array_push($nkey, true);
  95. array_merge($subkeys, $this->get_subkeys($nkey, $node));
  96. }
  97. if($node["zero"] != 0) {
  98. $nkey = array_merge($key);
  99. array_push($nkey, false);
  100. array_merge($subkeys, $this->get_subkeys($nkey, $node));
  101. }
  102. return $subkeys;
  103. }
  104. private function read_tree_node($location) {
  105. fseek($this->handle, $location);
  106. $data = unpack("Vzero/Chas/Vcol/Vone", fread($this->handle, 13));
  107. return $data;
  108. }
  109. private function string_to_bin_arr($string) {
  110. $data = array();
  111. $refbits = array(1,2,4,8,16,32,64,128);
  112. for ($i=0; $i < strlen($string) * 8; $i++) {
  113. $char = ord($string[intdiv($i,8)]);
  114. $ref = $refbits[$i%8];
  115. $data[$i] = ($char & $ref) == $ref;
  116. }
  117. return $data;
  118. }
  119. public function do_search($query) {
  120. $words = explode(" ", $query);
  121. $results = null;
  122. foreach($words as $word) {
  123. $col = $this->find_word_matches($word);
  124. if($col == null){
  125. return array();
  126. }
  127. $col = $this->get_collection_by_id($col);
  128. if($results == null){
  129. $results = $col;
  130. }
  131. else {
  132. $results = array_intersect($results, $col);
  133. }
  134. if(count($results) == 0) {
  135. return $results;
  136. }
  137. }
  138. return array_values($results);
  139. }
  140. }
  141. ?>