Преглед изворни кода

Implement "rebuild index" message, and implement PHP PPIX indexer

Billy Barrow пре 1 година
родитељ
комит
0b29152233
6 измењених фајлова са 336 додато и 9 уклоњено
  1. 1 1
      config.php
  2. 15 4
      index.php
  3. 20 4
      ppcl.php
  4. 271 0
      ppix-gen.php
  5. 3 0
      ppix.php
  6. 26 0
      pprf.php

+ 1 - 1
config.php

@@ -8,7 +8,7 @@ define("SITE_LANGUAGE", "en-nz");
 define("PUBLICATION_DIR", "ppubs");
 define("PUBLICATION_NAME", "Post");
 define("DATE_FORMAT", "l d F Y, H:i");
-define("USE_PPIX", false);
+define("USE_PPIX", true);
 define("USE_PPCL", true);
 define("ENABLE_PPRF", true);
 define("ENABLE_PPRF_VCDIFF", true);

+ 15 - 4
index.php

@@ -7,6 +7,13 @@ define("INDEX_TYPE_MAIN", 0);
 define("INDEX_TYPE_TAG", 1);
 define("INDEX_TYPE_SEARCH", 2);
 
+$ppcl = null;
+if(USE_PPCL) {
+    include_once("ppcl.php");
+    $ppcl = new Ppcl();
+    $ppcl->from_string(file_get_contents(PUBLICATION_DIR . "/collection.ppcl"));
+}
+
 function get_ppub_file_list() {
     if(USE_PPIX) {
         include_once("ppix.php");
@@ -43,9 +50,6 @@ function get_ppub_file_list() {
         }
     }
     else if(USE_PPCL) {
-        include_once("ppcl.php");
-        $ppcl = new Ppcl();
-        $ppcl->from_string(file_get_contents(PUBLICATION_DIR . "/collection.ppcl"));
         $list = array();
         foreach($ppcl->publications as $pub) {
             array_push($list, $pub->name);
@@ -125,6 +129,7 @@ if($file == "" or $file == "/" or $file == "feed.rss") {
     $list = get_ppub_file_list();
     
     foreach ($list as $file) {
+        error_log("File: " . $file);
         $ppub = new Ppub();
         $ppub->read_file(PUBLICATION_DIR . "/".$file);
         $listing($ppub, $file);
@@ -141,7 +146,13 @@ if($file == "" or $file == "/" or $file == "feed.rss") {
 $file = str_replace("/", "", $file);
 $file_name = $file;
 $file = PUBLICATION_DIR . "/" . $file;
-if(!file_exists($file)){
+$ppcl_pub = null;
+if(USE_PPCL) {
+    $ppcl_pub = $ppcl->get_publication($file_name);
+}
+
+// Don't serve non-published files if using PPCL
+if(!file_exists($file) || (USE_PPCL && $ppcl_pub == null)){
     header('HTTP/1.1 404 Not Found');
     include("404.php");
     exit();

+ 20 - 4
ppcl.php

@@ -43,7 +43,7 @@ class Ppcl {
                 array_push($this->domains, $entry[1]);
             }
             else if($entry[0] == "MEM" && $authoritative) {
-                array_push($this->members, new CollectionMember($entry[1], base64_decode($entry[2]), base64_decode($entry[3]), base64_decode($entry[4])));
+                array_push($this->members, new CollectionMember($entry[1], $entry[2], base64_decode($entry[3])));
             }
             else if($entry[0] == "AGT" && $authoritative) {
                 array_push($this->agents, new CollectionAgent($entry[1], base64_decode($entry[2]), base64_decode($entry[3])));
@@ -115,6 +115,15 @@ class Ppcl {
         $str .= "\nSSG " . base64_encode($shared_signature);
         return $str;
     }
+
+    public function get_publication($name) {
+        foreach($this->publications as $pub) {
+            if($pub->name == $name) {
+                return $pub;
+            }
+        }
+        return null;
+    }
 }
 
 class CollectionMember {
@@ -123,11 +132,18 @@ class CollectionMember {
     public $sealing_public_key;
     public $collection_secret;
     
-    public function __construct($name, $sign_key, $seal_key, $secret) {
+    public function __construct($name, $keys, $secret) {
         $this->name = $name;
-        $this->signing_public_key = $sign_key;
-        $this->sealing_public_key = $seal_key;
         $this->collection_secret = $secret;
+        
+        $key_parts = explode(":", $keys);
+        if($key_parts[0] != "CLMPK") {
+            error_log($keys);
+            throw new Exception("Invalid member public key");
+        }
+        $key_data = base64_decode($key_parts[1]);
+        $this->signing_public_key = substr($key_data, 0, SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES);
+        $this->sealing_public_key = substr($key_data, SODIUM_CRYPTO_SIGN_PUBLICKEYBYTES, SODIUM_CRYPTO_BOX_PUBLICKEYBYTES);
     }
 }
 

+ 271 - 0
ppix-gen.php

@@ -0,0 +1,271 @@
+<?php
+include_once("ppub.php");
+
+class PpixGenerator {
+
+    private $words = array();
+    private $collections = array();
+    private $tags = array();
+    private $pubs = array();
+
+    public function add_publication($path) {
+        $ppub = new Ppub();
+        $ppub->read_file($path);
+
+        $index = count($this->pubs);
+        array_push($this->pubs, [explode("/", $path, 2)[1], $ppub]);
+
+        $tags = $ppub->metadata["tags"];
+        if($tags != null) {
+            foreach(explode(" ", $tags) as $tag) {
+                if(array_key_exists($tag, $this->tags)) {
+                    array_push($collections[$this->tags[$tag]], $index);
+                }
+                else {
+                    $collection_index = count($this->collections);
+                    array_push($this->collections, [$index,]);
+                    $this->tags[$tag] = $collection_index;
+                }
+            }
+        }
+
+        $words = array();
+
+        $words = $this->word_array_collect($words, $ppub->metadata["title"]);
+        $words = $this->word_array_collect($words, $ppub->metadata["description"]);
+        $words = $this->word_array_collect($words, $ppub->metadata["author"]);
+
+        $default_asset = $ppub->asset_list[1];
+        error_log("Default asset: " . $default_asset->mimetype);
+        if(str_starts_with($default_asset->mimetype, "text/")) {
+            $words = $this->word_array_collect($words, $ppub->read_asset($default_asset));
+        }
+
+        if($default_asset->mimetype == "application/x-ppvm") {
+            include_once("ppvm.php");
+            $ppvm = new Ppvm();
+            $ppvm->from_string($ppub->read_asset($default_asset));
+            $descripton = $ppub->read_asset($ppub->asset_index[$ppvm->metadata["description"]]);
+            $words = $this->word_array_collect($words, $descripton);
+        }
+
+        foreach($words as $word => $_) {
+            if(array_key_exists($word, $this->words)) {
+                array_push($this->collections[$this->words[$word]], $index);
+            }
+            else {
+                $collection_index = count($this->collections);
+                array_push($this->collections, [$index,]);
+                $this->words[$word] = $collection_index;
+            }
+        }
+    }
+
+    public function serialise() {
+        $str = "PPIX\x00";
+        $start = 21;
+
+        $publication_index_start = $start;
+        $publication_index = $this->serialise_publication_index($start);
+        $start += strlen($publication_index);
+
+        $collection_index_start = $start;
+        $collection_index = $this->serialise_collections($start);
+        $start += strlen($collection_index);
+
+        $tag_index_start = $start;
+        $tag_index = $this->serialise_tags();
+        $start += strlen($tag_index);
+
+        $str .= pack("VVVV", $publication_index_start, $collection_index_start, $tag_index_start, $start);
+        $str .= $publication_index;
+        $str .= $collection_index;
+        $str .= $tag_index;
+
+        $str .= $this->serialise_word_tree($start);
+        return $str;
+    }
+
+    private function serialise_publication_index($start_pos) {
+        $data = pack("V", count($this->pubs));
+        $string_data_start = $start_pos + 4 + (count($this->pubs) * 6);
+        $string_data = "";
+
+        foreach($this->pubs as $pub) {
+            $name = $pub[0];
+            $data .= pack("Vv", $string_data_start + strlen($string_data), strlen($name));
+            $string_data .= $name;
+        }
+
+        return $data . $string_data;
+    }
+
+    private function serialise_collections($start_pos) {
+        $index_data = "";
+        $collection_data_start = $start_pos + (count($this->collections) * 6);
+        $collection_data = "";
+
+        foreach($this->collections as $dup_col) {
+            $col = array_unique($dup_col);
+            $index_data .= pack("Vv", $collection_data_start + strlen($collection_data), count($col));
+            foreach($col as $pub_id) {
+                $collection_data .= pack("V", $pub_id);
+            }
+        }
+
+        return $index_data . $collection_data;
+    }
+
+    private function serialise_tags() {
+        $data = pack("v", count($this->tags));
+        
+        foreach($this->tags as $tag => $col_id) {
+            $data .= pack("CV", strlen($tag), $col_id);
+            $data .= $tag;
+        }
+
+        return $data;
+    }
+
+    function serialise_word_tree($start_position) {
+        $words = array();
+        foreach ($this->words as $k => $v) {
+            array_push($words, [$this->str_to_bool_array($k), $v]);
+        }
+    
+        usort($words, function($a, $b) {
+            return $a[0][0] <=> $b[0][0];
+        });
+    
+        $root = new WordBit();
+        $nodes = ["" => $root];
+
+        foreach ($words as $word) {
+            $last_bit = null;
+            for ($i = 0; $i < strlen($word[0][0]); $i++) {
+                $key = substr($word[0][0], 0, $i + 1);
+                if (array_key_exists($key, $nodes)) {
+                    $last_bit = $nodes[$key];
+                    continue;
+                }
+    
+                $last_bit = new WordBit();
+                $past_key = substr($word[0][0], 0, $i);
+                if ($word[0][1][$i]) {
+                    $nodes[$past_key]->next_1 = $last_bit;
+                } else {
+                    $nodes[$past_key]->next_0 = $last_bit;
+                }
+                $nodes[$key] = $last_bit;
+            }
+    
+            $last_bit->collection = $word[1];
+        }
+    
+        $root->position = $start_position;
+        $node_array = [$root];
+        unset($nodes[""]);
+    
+        $counter = $root->position + WordBit::SIZE;
+        foreach ($nodes as $node) {
+            $node->position = $counter;
+            array_push($node_array, $node);
+            $counter += WordBit::SIZE;
+        }
+    
+        $output = "";
+        foreach ($node_array as $node) {
+            $output .= $node->serialise();
+        }
+        return $output;
+    }
+    
+
+    // private function serialise_word_tree($start_pos) {
+    //     $data = "";
+
+    //     $words = array_multisort(
+    //         array_map(function($k, $v) {
+    //             return [$this->str_to_bool_array($k), $v];
+    //         }, array_keys($this->words), $this->words),
+    //         SORT_ASC,
+    //         array_column($words, 0, 0)
+    //     );
+
+    //     $root = new WordBit();
+    //     $nodes = ["" => $root];
+
+    //     foreach($words as $word) {
+    //         $last_bit = null;
+    //         for($i = 0; $ < count($word[0][0]); $i++){
+    //             $key = $word[0][0]
+    //         }
+    //     }
+    // }
+
+    private function str_to_bool_array($string) {
+        $array = array();
+        foreach(str_split($string) as $char) {
+            foreach([1,2,4,8,16,32,64,128] as $i) {
+                array_push($array, (ord($char) & $i) == $i);
+            }
+        }
+        $string = "";
+        foreach($array as $bit) {
+            $string .= $bit ? 1 : 0;
+        }
+
+        return [$string, $array];
+    }
+
+    private function word_array_collect($array, $word_soup) {
+        if($word_soup == null) {
+            return;
+        }
+
+        $stripped = preg_replace("/[^[:alnum:][:space:]]/u", '', $word_soup);
+        $words = explode(" ", $stripped);
+        foreach($words as $word) {
+            if($word != "") {
+                $array[strtolower($word)] = 1;
+            }
+        }
+
+        return $array;
+    }
+
+}
+
+class WordBit {
+    public const SIZE = 13;
+    public $next_0;
+    public $next_1;
+    public $collection;
+    public $position;
+
+    public function serialise() {
+        $n0 = $this->next_0 == null ? 0 : $this->next_0->position;
+        $n1 = $this->next_1 == null ? 0 : $this->next_1->position;
+        $col = $this->collection == null ? 0 : $this->collection;
+        $has_col = $col == 0 ? 0 : 255;
+        return pack("VCVV", $n0, $has_col, $col, $n1);
+    }
+}
+
+
+function generate_ppix_from_ppcl() {
+    include_once("config.php");
+    include_once("ppcl.php");
+    $ppcl = new Ppcl();
+    $ppcl->from_string(file_get_contents(PUBLICATION_DIR . "/collection.ppcl"));
+    
+    $pubs = array_reverse($ppcl->publications);
+    $ppix = new PpixGenerator();
+    foreach($pubs as $pub) {
+        $ppix->add_publication(PUBLICATION_DIR . "/" . $pub->name);
+    }
+    
+    file_put_contents(PUBLICATION_DIR . "/lib.ppix", $ppix->serialise());
+}
+
+?>

+ 3 - 0
ppix.php

@@ -22,9 +22,12 @@ class Ppix {
     }
 
     public function get_publication_by_id($id) {
+        error_log("Get by id " . $id);
         $location = $this->locations["pub"] + 4 + ($id * 6);
+        error_log("Location " . $location);
         fseek($this->handle, $location);
         $string_info = unpack("Vloc/vlen", fread($this->handle, 6));
+        error_log("String loc/len " . $string_info["loc"] . "/" . $string_info["len"]);
         fseek($this->handle, $string_info["loc"]);
         return fread($this->handle, $string_info["len"]);
     }

+ 26 - 0
pprf.php

@@ -360,6 +360,32 @@ if($message_info["type"] == 37) {
     }
 }
 
+// Rebuild index message
+if($message_info["type"] == 39) {
+    $ppcl = get_ppcl();
+    verify_collection_message($handle, $ppcl);
+    $auth = read_authenticated_message($handle, $ppcl);
+
+    $signature = sodium_crypto_sign_open($auth["authentication"], $auth["member"]->signing_public_key);
+    if($signature == null) {
+        send_failure(5, "Could not verify member signature");
+    }
+
+    $expected_auth = "PPIX\xFF" . $ppcl->current_state_token;
+    if($expected_auth != $signature) {
+        send_failure(5, "Invalid authorisation token");
+    }
+
+    try {
+        include_once("ppix-gen.php");
+        generate_ppix_from_ppcl();
+    }
+    catch(Exception $e) {
+        send_failure(12, $e->getMessage());
+    }
+    send_confirmation();
+}
+
 
 // // Get identity message
 // if($message_info["type"] == 41) {