read_file($path); $index = count($this->pubs); array_push($this->pubs, [explode("/", $path, 2)[1], $ppub]); $tags = $ppub->metadata["tags"]; if($tags != null) { foreach(explode(" ", $tags) as $tag) { if(array_key_exists($tag, $this->tags)) { array_push($this->collections[$this->tags[$tag]], $index); } else { $collection_index = count($this->collections); array_push($this->collections, [$index,]); $this->tags[$tag] = $collection_index; } } } $words = array(); $words = $this->word_array_collect($words, $ppub->metadata["title"]); $words = $this->word_array_collect($words, $ppub->metadata["description"]); $words = $this->word_array_collect($words, $ppub->metadata["author"]); $default_asset = $ppub->asset_list[1]; error_log("Default asset: " . $default_asset->mimetype); if(substr($default_asset->mimetype, 0, 5) == "text/") { $words = $this->word_array_collect($words, $ppub->read_asset($default_asset)); } if($default_asset->mimetype == "application/x-ppvm") { include_once("ppvm.php"); $ppvm = new Ppvm(); $ppvm->from_string($ppub->read_asset($default_asset)); $descripton = $ppub->read_asset($ppub->asset_index[$ppvm->metadata["description"]]); $words = $this->word_array_collect($words, $descripton); } foreach($words as $word => $_) { if(array_key_exists($word, $this->words)) { array_push($this->collections[$this->words[$word]], $index); } else { $collection_index = count($this->collections); array_push($this->collections, [$index,]); $this->words[$word] = $collection_index; } } } public function serialise() { $str = "PPIX\x00"; $start = 21; $publication_index_start = $start; $publication_index = $this->serialise_publication_index($start); $start += strlen($publication_index); $collection_index_start = $start; $collection_index = $this->serialise_collections($start); $start += strlen($collection_index); $tag_index_start = $start; $tag_index = $this->serialise_tags(); $start += strlen($tag_index); $str .= pack("VVVV", $publication_index_start, $collection_index_start, $tag_index_start, $start); $str .= $publication_index; $str .= $collection_index; $str .= $tag_index; $str .= $this->serialise_word_tree($start); return $str; } private function serialise_publication_index($start_pos) { $data = pack("V", count($this->pubs)); $string_data_start = $start_pos + 4 + (count($this->pubs) * 6); $string_data = ""; foreach($this->pubs as $pub) { $name = $pub[0]; $data .= pack("Vv", $string_data_start + strlen($string_data), strlen($name)); $string_data .= $name; } return $data . $string_data; } private function serialise_collections($start_pos) { $index_data = ""; $collection_data_start = $start_pos + (count($this->collections) * 6); $collection_data = ""; foreach($this->collections as $dup_col) { $col = array_unique($dup_col); $index_data .= pack("Vv", $collection_data_start + strlen($collection_data), count($col)); foreach($col as $pub_id) { $collection_data .= pack("V", $pub_id); } } return $index_data . $collection_data; } private function serialise_tags() { $data = pack("v", count($this->tags)); foreach($this->tags as $tag => $col_id) { $data .= pack("CV", strlen($tag), $col_id); $data .= $tag; } return $data; } function serialise_word_tree($start_position) { $words = array(); foreach ($this->words as $k => $v) { array_push($words, [$this->str_to_bool_array($k), $v]); } usort($words, function($a, $b) { return $a[0][0] <=> $b[0][0]; }); $root = new WordBit(); $nodes = ["" => $root]; foreach ($words as $word) { $last_bit = null; for ($i = 0; $i < strlen($word[0][0]); $i++) { $key = substr($word[0][0], 0, $i + 1); if (array_key_exists($key, $nodes)) { $last_bit = $nodes[$key]; continue; } $last_bit = new WordBit(); $past_key = substr($word[0][0], 0, $i); if ($word[0][1][$i]) { $nodes[$past_key]->next_1 = $last_bit; } else { $nodes[$past_key]->next_0 = $last_bit; } $nodes[$key] = $last_bit; } $last_bit->collection = $word[1]; } $root->position = $start_position; $node_array = [$root]; unset($nodes[""]); $counter = $root->position + WordBit::SIZE; foreach ($nodes as $node) { $node->position = $counter; array_push($node_array, $node); $counter += WordBit::SIZE; } $output = ""; foreach ($node_array as $node) { $output .= $node->serialise(); } return $output; } // private function serialise_word_tree($start_pos) { // $data = ""; // $words = array_multisort( // array_map(function($k, $v) { // return [$this->str_to_bool_array($k), $v]; // }, array_keys($this->words), $this->words), // SORT_ASC, // array_column($words, 0, 0) // ); // $root = new WordBit(); // $nodes = ["" => $root]; // foreach($words as $word) { // $last_bit = null; // for($i = 0; $ < count($word[0][0]); $i++){ // $key = $word[0][0] // } // } // } private function str_to_bool_array($string) { $array = array(); foreach(str_split($string) as $char) { foreach([1,2,4,8,16,32,64,128] as $i) { array_push($array, (ord($char) & $i) == $i); } } $string = ""; foreach($array as $bit) { $string .= $bit ? 1 : 0; } return [$string, $array]; } private function word_array_collect($array, $word_soup) { if($word_soup == null) { return; } $stripped = preg_replace("/[^[:alnum:][:space:]]/u", '', $word_soup); $words = explode(" ", $stripped); foreach($words as $word) { if($word != "") { $array[strtolower($word)] = 1; } } return $array; } } class WordBit { public const SIZE = 13; public $next_0; public $next_1; public $collection; public $position; public function serialise() { $n0 = $this->next_0 == null ? 0 : $this->next_0->position; $n1 = $this->next_1 == null ? 0 : $this->next_1->position; $col = $this->collection == null ? 0 : $this->collection; $has_col = $col == 0 ? 0 : 255; return pack("VCVV", $n0, $has_col, $col, $n1); } } function generate_ppix_from_ppcl() { include_once("config.php"); include_once("ppcl.php"); $ppcl = new Ppcl(); $ppcl->from_string(file_get_contents(PUBLICATION_DIR . "/collection.ppcl")); $pubs = array_reverse($ppcl->publications); $ppix = new PpixGenerator(); foreach($pubs as $pub) { $ppix->add_publication(PUBLICATION_DIR . "/" . $pub->name); } file_put_contents(PUBLICATION_DIR . "/lib.ppix", $ppix->serialise()); } ?>