08-Set-Operations.md 12 KB

Set Operations

This document describes the API for set operations over entity children sets.

Overview

Implexus provides set operations over entity children, following the Invercargill.Set interface pattern. Operations include union, intersection, difference, and symmetric difference.

EntitySet Class

The EntitySet class wraps an entity and provides set operations over its children.

namespace Implexus.Entities {

public class EntitySet : Object, Invercargill.ReadOnlySet<Entity> {
    
    private Entity _entity;
    private Invercargill.DataStructures.HashSet<Entity>? _cached_children;
    
    public EntitySet(Entity entity) {
        _entity = entity;
        _cached_children = null;
    }
    
    // Lazy-load and cache children
    private Invercargill.DataStructures.HashSet<Entity> get_children() {
        if (_cached_children == null) {
            _cached_children = new Invercargill.DataStructures.HashSet<Entity>();
            foreach (var child in _entity.get_children()) {
                _cached_children.add(child);
            }
        }
        return (!) _cached_children;
    }
    
    // ReadOnlySet implementation
    public int count { get { return (int) get_children().count; } }
    
    public bool is_empty { get { return count == 0; } }
    
    public bool has(Entity item) {
        return get_children().has(item);
    }
    
    public bool has_all(Invercargill.ReadOnlyCollection<Entity> items) {
        foreach (var item in items) {
            if (!has(item)) return false;
        }
        return true;
    }
    
    public bool has_any(Invercargill.ReadOnlyCollection<Entity> items) {
        foreach (var item in items) {
            if (has(item)) return true;
        }
        return false;
    }
    
    public Entity? find(Entity item) {
        if (has(item)) return item;
        return null;
    }
    
    public bool try_find(Entity item, out Entity result) {
        result = item;
        return has(item);
    }
    
    public bool equals(Invercargill.ReadOnlySet<Entity> other) {
        if (count != other.count) return false;
        foreach (var item in get_children()) {
            if (!other.has(item)) return false;
        }
        return true;
    }
    
    public bool is_subset_of(Invercargill.ReadOnlySet<Entity> other) {
        foreach (var item in get_children()) {
            if (!other.has(item)) return false;
        }
        return true;
    }
    
    public bool is_superset_of(Invercargill.ReadOnlySet<Entity> other) {
        return other.is_subset_of(this);
    }
    
    public bool is_proper_subset_of(Invercargill.ReadOnlySet<Entity> other) {
        return count < other.count && is_subset_of(other);
    }
    
    public bool is_proper_superset_of(Invercargill.ReadOnlySet<Entity> other) {
        return count > other.count && is_superset_of(other);
    }
    
    public bool overlaps(Invercargill.ReadOnlySet<Entity> other) {
        foreach (var item in get_children()) {
            if (other.has(item)) return true;
        }
        return false;
    }
    
    public Invercargill.Enumerable<Entity> as_enumerable() {
        return get_children().as_enumerable();
    }
    
    public Invercargill.Lot<Entity> as_lot() {
        return get_children().as_lot();
    }
    
    // Set operations - return new EntitySet
    public EntitySet union(EntitySet other) {
        var result = new Invercargill.DataStructures.HashSet<Entity>();
        foreach (var item in get_children()) {
            result.add(item);
        }
        foreach (var item in other.get_children()) {
            result.add(item);
        }
        return new EntitySet.from_set(result);
    }
    
    public EntitySet intersect(EntitySet other) {
        var result = new Invercargill.DataStructures.HashSet<Entity>();
        foreach (var item in get_children()) {
            if (other.has(item)) {
                result.add(item);
            }
        }
        return new EntitySet.from_set(result);
    }
    
    public EntitySet except(EntitySet other) {
        var result = new Invercargill.DataStructures.HashSet<Entity>();
        foreach (var item in get_children()) {
            if (!other.has(item)) {
                result.add(item);
            }
        }
        return new EntitySet.from_set(result);
    }
    
    public EntitySet symmetric_except(EntitySet other) {
        var result = new Invercargill.DataStructures.HashSet<Entity>();
        foreach (var item in get_children()) {
            if (!other.has(item)) {
                result.add(item);
            }
        }
        foreach (var item in other.get_children()) {
            if (!has(item)) {
                result.add(item);
            }
        }
        return new EntitySet.from_set(result);
    }
    
    // Factory from pre-built set
    public EntitySet.from_set(Invercargill.DataStructures.HashSet<Entity> set) {
        _entity = null;
        _cached_children = set;
    }
}

} // namespace Implexus.Entities

Set Operation Methods

The Entity interface provides convenient access to set operations:

public interface Entity : Object {
    // ...
    
    // Get this entity's children as a set
    public abstract EntitySet as_set();
    
    // Direct set operations
    public EntitySet union_with(Entity other) {
        return as_set().union(other.as_set());
    }
    
    public EntitySet intersect_with(Entity other) {
        return as_set().intersect(other.as_set());
    }
    
    public EntitySet except_with(Entity other) {
        return as_set().except(other.as_set());
    }
    
    public EntitySet symmetric_except_with(Entity other) {
        return as_set().symmetric_except(other.as_set());
    }
}

Set Operation Diagram

graph LR
    subgraph Set A - Container A children
        A1[Doc 1]
        A2[Doc 2]
        A3[Doc 3]
    end
    
    subgraph Set B - Container B children
        B2[Doc 2]
        B3[Doc 3]
        B4[Doc 4]
    end
    
    subgraph Union
        U1[Doc 1]
        U2[Doc 2]
        U3[Doc 3]
        U4[Doc 4]
    end
    
    subgraph Intersection
        I2[Doc 2]
        I3[Doc 3]
    end
    
    subgraph A except B
        E1[Doc 1]
    end
    
    subgraph Symmetric Diff
        S1[Doc 1]
        S4[Doc 4]
    end

Usage Examples

Basic Set Operations

// Create categories with some overlapping children
var container_a = engine.get_root().create_container("set_a");
container_a.create_document("doc1", "Item");
container_a.create_document("doc2", "Item");
container_a.create_document("doc3", "Item");

var container_b = engine.get_root().create_container("set_b");
container_b.create_document("doc2", "Item");  // Overlap
container_b.create_document("doc3", "Item");  // Overlap
container_b.create_document("doc4", "Item");

// Union - all documents from both categories
var union = container_a.union_with(container_b);
print("Union count: %d\n", union.count);  // 4

// Intersection - documents in both categories
var intersection = container_a.intersect_with(container_b);
print("Intersection count: %d\n", intersection.count);  // 2

// Difference - documents in A but not in B
var difference = container_a.except_with(container_b);
print("Difference count: %d\n", difference.count);  // 1 (doc1)

// Symmetric difference - documents in A or B but not both
var sym_diff = container_a.symmetric_except_with(container_b);
print("Symmetric diff count: %d\n", sym_diff.count);  // 2 (doc1, doc4)

Combining with Categorys

// Create tasks with different statuses
var tasks = engine.get_root().create_container("tasks");
var task1 = tasks.create_document("task1", "Task");
task1.set_property("status", new ValueElement("open"));
task1.set_property("priority", new ValueElement("high"));

var task2 = tasks.create_document("task2", "Task");
task2.set_property("status", new ValueElement("open"));
task2.set_property("priority", new ValueElement("low"));

var task3 = tasks.create_document("task3", "Task");
task3.set_property("status", new ValueElement("closed"));
task3.set_property("priority", new ValueElement("high"));

// Create categorys for different groupings
var by_status = tasks.create_category("by_status", "Task", "status");
var by_priority = tasks.create_category("by_priority", "Task", "priority");

// Get high priority open tasks using intersection
var open_tasks = by_status.get_child("open");
var high_priority = by_priority.get_child("high");
var high_priority_open = open_tasks.intersect_with(high_priority);

print("High priority open tasks: %d\n", high_priority_open.count);  // 1 (task1)

Set Membership Tests

var container = engine.get_root().create_container("test");
var doc1 = container.create_document("doc1", "Item");
var doc2 = container.create_document("doc2", "Item");

var set = container.as_set();

// Check membership
assert(set.has(doc1));
assert(set.has(doc2));

// Check multiple
var check = new Invercargill.DataStructures.Vector<Entity>();
check.add(doc1);
check.add(doc2);
assert(set.has_all(check));

// Subset tests
var other_container = engine.get_root().create_container("other");
other_container.create_document("doc1", "Item");

assert(set.is_superset_of(other_container.as_set()));
assert(other_container.as_set().is_subset_of(set));

Chaining Operations

// Multiple set operations can be chained
var result = container_a
    .union_with(container_b)
    .intersect_with(container_c)
    .except_with(container_d);

// Iterate results
foreach (var entity in result.as_enumerable()) {
    print("Result: %s\n", entity.name);
}

Set Operations with Categorys

Categorys are particularly useful with set operations:

// Find tasks that are both high priority AND assigned to a specific user
var by_priority = tasks.create_category("by_priority", "Task", "priority");
var by_assignee = tasks.create_category("by_assignee", "Task", "assignee");

var high_priority = by_priority.get_child("high");
var assigned_john = by_assignee.get_child("john");

var johns_high_priority = high_priority.intersect_with(assigned_john);

// Find tasks that are high priority OR urgent
var urgent = by_priority.get_child("urgent");
var high_or_urgent = high_priority.union_with(urgent);

// Find tasks that are high priority but NOT assigned to john
var high_not_john = high_priority.except_with(assigned_john);

Performance Considerations

Caching

EntitySet caches children on first access:

public class EntitySet {
    private Invercargill.DataStructures.HashSet<Entity>? _cached_children;
    
    // Children loaded once, then cached
    private Invercargill.DataStructures.HashSet<Entity> get_children() {
        if (_cached_children == null) {
            _cached_children = new Invercargill.DataStructures.HashSet<Entity>();
            foreach (var child in _entity.get_children()) {
                _cached_children.add(child);
            }
        }
        return (!) _cached_children;
    }
}

Large Sets

For large result sets, use Enumerable directly instead of materializing:

// Instead of:
var set = container.as_set();  // Materializes all children

// Use streaming:
var filtered = container.get_children()
    .where(entity => entity.type_label == "Task")
    .select(entity => entity as Document);

Set Operation Summary

Operation Symbol Result
Union A ∪ B All elements in A or B
Intersection A ∩ B Elements in both A and B
Difference A \ B Elements in A but not B
Symmetric Difference A △ B Elements in A or B but not both
Method Returns
union_with(other) EntitySet
intersect_with(other) EntitySet
except_with(other) EntitySet
symmetric_except_with(other) EntitySet
as_set() EntitySet
is_subset_of(other) bool
is_superset_of(other) bool
overlaps(other) bool