|
@@ -0,0 +1,271 @@
|
|
|
+<?php
|
|
|
+include_once("ppub.php");
|
|
|
+
|
|
|
+class PpixGenerator {
|
|
|
+
|
|
|
+ private $words = array();
|
|
|
+ private $collections = array();
|
|
|
+ private $tags = array();
|
|
|
+ private $pubs = array();
|
|
|
+
|
|
|
+ public function add_publication($path) {
|
|
|
+ $ppub = new Ppub();
|
|
|
+ $ppub->read_file($path);
|
|
|
+
|
|
|
+ $index = count($this->pubs);
|
|
|
+ array_push($this->pubs, [explode("/", $path, 2)[1], $ppub]);
|
|
|
+
|
|
|
+ $tags = $ppub->metadata["tags"];
|
|
|
+ if($tags != null) {
|
|
|
+ foreach(explode(" ", $tags) as $tag) {
|
|
|
+ if(array_key_exists($tag, $this->tags)) {
|
|
|
+ array_push($collections[$this->tags[$tag]], $index);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $collection_index = count($this->collections);
|
|
|
+ array_push($this->collections, [$index,]);
|
|
|
+ $this->tags[$tag] = $collection_index;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ $words = array();
|
|
|
+
|
|
|
+ $words = $this->word_array_collect($words, $ppub->metadata["title"]);
|
|
|
+ $words = $this->word_array_collect($words, $ppub->metadata["description"]);
|
|
|
+ $words = $this->word_array_collect($words, $ppub->metadata["author"]);
|
|
|
+
|
|
|
+ $default_asset = $ppub->asset_list[1];
|
|
|
+ error_log("Default asset: " . $default_asset->mimetype);
|
|
|
+ if(str_starts_with($default_asset->mimetype, "text/")) {
|
|
|
+ $words = $this->word_array_collect($words, $ppub->read_asset($default_asset));
|
|
|
+ }
|
|
|
+
|
|
|
+ if($default_asset->mimetype == "application/x-ppvm") {
|
|
|
+ include_once("ppvm.php");
|
|
|
+ $ppvm = new Ppvm();
|
|
|
+ $ppvm->from_string($ppub->read_asset($default_asset));
|
|
|
+ $descripton = $ppub->read_asset($ppub->asset_index[$ppvm->metadata["description"]]);
|
|
|
+ $words = $this->word_array_collect($words, $descripton);
|
|
|
+ }
|
|
|
+
|
|
|
+ foreach($words as $word => $_) {
|
|
|
+ if(array_key_exists($word, $this->words)) {
|
|
|
+ array_push($this->collections[$this->words[$word]], $index);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $collection_index = count($this->collections);
|
|
|
+ array_push($this->collections, [$index,]);
|
|
|
+ $this->words[$word] = $collection_index;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public function serialise() {
|
|
|
+ $str = "PPIX\x00";
|
|
|
+ $start = 21;
|
|
|
+
|
|
|
+ $publication_index_start = $start;
|
|
|
+ $publication_index = $this->serialise_publication_index($start);
|
|
|
+ $start += strlen($publication_index);
|
|
|
+
|
|
|
+ $collection_index_start = $start;
|
|
|
+ $collection_index = $this->serialise_collections($start);
|
|
|
+ $start += strlen($collection_index);
|
|
|
+
|
|
|
+ $tag_index_start = $start;
|
|
|
+ $tag_index = $this->serialise_tags();
|
|
|
+ $start += strlen($tag_index);
|
|
|
+
|
|
|
+ $str .= pack("VVVV", $publication_index_start, $collection_index_start, $tag_index_start, $start);
|
|
|
+ $str .= $publication_index;
|
|
|
+ $str .= $collection_index;
|
|
|
+ $str .= $tag_index;
|
|
|
+
|
|
|
+ $str .= $this->serialise_word_tree($start);
|
|
|
+ return $str;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function serialise_publication_index($start_pos) {
|
|
|
+ $data = pack("V", count($this->pubs));
|
|
|
+ $string_data_start = $start_pos + 4 + (count($this->pubs) * 6);
|
|
|
+ $string_data = "";
|
|
|
+
|
|
|
+ foreach($this->pubs as $pub) {
|
|
|
+ $name = $pub[0];
|
|
|
+ $data .= pack("Vv", $string_data_start + strlen($string_data), strlen($name));
|
|
|
+ $string_data .= $name;
|
|
|
+ }
|
|
|
+
|
|
|
+ return $data . $string_data;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function serialise_collections($start_pos) {
|
|
|
+ $index_data = "";
|
|
|
+ $collection_data_start = $start_pos + (count($this->collections) * 6);
|
|
|
+ $collection_data = "";
|
|
|
+
|
|
|
+ foreach($this->collections as $dup_col) {
|
|
|
+ $col = array_unique($dup_col);
|
|
|
+ $index_data .= pack("Vv", $collection_data_start + strlen($collection_data), count($col));
|
|
|
+ foreach($col as $pub_id) {
|
|
|
+ $collection_data .= pack("V", $pub_id);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return $index_data . $collection_data;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function serialise_tags() {
|
|
|
+ $data = pack("v", count($this->tags));
|
|
|
+
|
|
|
+ foreach($this->tags as $tag => $col_id) {
|
|
|
+ $data .= pack("CV", strlen($tag), $col_id);
|
|
|
+ $data .= $tag;
|
|
|
+ }
|
|
|
+
|
|
|
+ return $data;
|
|
|
+ }
|
|
|
+
|
|
|
+ function serialise_word_tree($start_position) {
|
|
|
+ $words = array();
|
|
|
+ foreach ($this->words as $k => $v) {
|
|
|
+ array_push($words, [$this->str_to_bool_array($k), $v]);
|
|
|
+ }
|
|
|
+
|
|
|
+ usort($words, function($a, $b) {
|
|
|
+ return $a[0][0] <=> $b[0][0];
|
|
|
+ });
|
|
|
+
|
|
|
+ $root = new WordBit();
|
|
|
+ $nodes = ["" => $root];
|
|
|
+
|
|
|
+ foreach ($words as $word) {
|
|
|
+ $last_bit = null;
|
|
|
+ for ($i = 0; $i < strlen($word[0][0]); $i++) {
|
|
|
+ $key = substr($word[0][0], 0, $i + 1);
|
|
|
+ if (array_key_exists($key, $nodes)) {
|
|
|
+ $last_bit = $nodes[$key];
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ $last_bit = new WordBit();
|
|
|
+ $past_key = substr($word[0][0], 0, $i);
|
|
|
+ if ($word[0][1][$i]) {
|
|
|
+ $nodes[$past_key]->next_1 = $last_bit;
|
|
|
+ } else {
|
|
|
+ $nodes[$past_key]->next_0 = $last_bit;
|
|
|
+ }
|
|
|
+ $nodes[$key] = $last_bit;
|
|
|
+ }
|
|
|
+
|
|
|
+ $last_bit->collection = $word[1];
|
|
|
+ }
|
|
|
+
|
|
|
+ $root->position = $start_position;
|
|
|
+ $node_array = [$root];
|
|
|
+ unset($nodes[""]);
|
|
|
+
|
|
|
+ $counter = $root->position + WordBit::SIZE;
|
|
|
+ foreach ($nodes as $node) {
|
|
|
+ $node->position = $counter;
|
|
|
+ array_push($node_array, $node);
|
|
|
+ $counter += WordBit::SIZE;
|
|
|
+ }
|
|
|
+
|
|
|
+ $output = "";
|
|
|
+ foreach ($node_array as $node) {
|
|
|
+ $output .= $node->serialise();
|
|
|
+ }
|
|
|
+ return $output;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ // private function serialise_word_tree($start_pos) {
|
|
|
+ // $data = "";
|
|
|
+
|
|
|
+ // $words = array_multisort(
|
|
|
+ // array_map(function($k, $v) {
|
|
|
+ // return [$this->str_to_bool_array($k), $v];
|
|
|
+ // }, array_keys($this->words), $this->words),
|
|
|
+ // SORT_ASC,
|
|
|
+ // array_column($words, 0, 0)
|
|
|
+ // );
|
|
|
+
|
|
|
+ // $root = new WordBit();
|
|
|
+ // $nodes = ["" => $root];
|
|
|
+
|
|
|
+ // foreach($words as $word) {
|
|
|
+ // $last_bit = null;
|
|
|
+ // for($i = 0; $ < count($word[0][0]); $i++){
|
|
|
+ // $key = $word[0][0]
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
+ private function str_to_bool_array($string) {
|
|
|
+ $array = array();
|
|
|
+ foreach(str_split($string) as $char) {
|
|
|
+ foreach([1,2,4,8,16,32,64,128] as $i) {
|
|
|
+ array_push($array, (ord($char) & $i) == $i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ $string = "";
|
|
|
+ foreach($array as $bit) {
|
|
|
+ $string .= $bit ? 1 : 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return [$string, $array];
|
|
|
+ }
|
|
|
+
|
|
|
+ private function word_array_collect($array, $word_soup) {
|
|
|
+ if($word_soup == null) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ $stripped = preg_replace("/[^[:alnum:][:space:]]/u", '', $word_soup);
|
|
|
+ $words = explode(" ", $stripped);
|
|
|
+ foreach($words as $word) {
|
|
|
+ if($word != "") {
|
|
|
+ $array[strtolower($word)] = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return $array;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+class WordBit {
|
|
|
+ public const SIZE = 13;
|
|
|
+ public $next_0;
|
|
|
+ public $next_1;
|
|
|
+ public $collection;
|
|
|
+ public $position;
|
|
|
+
|
|
|
+ public function serialise() {
|
|
|
+ $n0 = $this->next_0 == null ? 0 : $this->next_0->position;
|
|
|
+ $n1 = $this->next_1 == null ? 0 : $this->next_1->position;
|
|
|
+ $col = $this->collection == null ? 0 : $this->collection;
|
|
|
+ $has_col = $col == 0 ? 0 : 255;
|
|
|
+ return pack("VCVV", $n0, $has_col, $col, $n1);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+function generate_ppix_from_ppcl() {
|
|
|
+ include_once("config.php");
|
|
|
+ include_once("ppcl.php");
|
|
|
+ $ppcl = new Ppcl();
|
|
|
+ $ppcl->from_string(file_get_contents(PUBLICATION_DIR . "/collection.ppcl"));
|
|
|
+
|
|
|
+ $pubs = array_reverse($ppcl->publications);
|
|
|
+ $ppix = new PpixGenerator();
|
|
|
+ foreach($pubs as $pub) {
|
|
|
+ $ppix->add_publication(PUBLICATION_DIR . "/" . $pub->name);
|
|
|
+ }
|
|
|
+
|
|
|
+ file_put_contents(PUBLICATION_DIR . "/lib.ppix", $ppix->serialise());
|
|
|
+}
|
|
|
+
|
|
|
+?>
|