using Invercargill.DataStructures; using Invercargill.Expressions; namespace InvercargillSql.Orm.Projections { /** * Analysis result containing information about aggregate functions found in an expression. * * Returned by AggregateAnalyzer.analyze(), this class provides details about * which aggregate functions were detected and their locations in the expression. * * Example: * {{{ * var analysis = analyzer.analyze("COUNT(o.id) + SUM(o.total)"); * // analysis.contains_aggregate == true * // analysis.aggregate_functions_found contains "COUNT", "SUM" * }}} */ public class AggregateAnalysis : Object { /** * Indicates whether the expression contains any aggregate functions. */ public bool contains_aggregate { get; construct; } /** * The list of aggregate function names found in the expression. * * This may contain duplicates if the same function appears multiple times. * The names are in their original case as they appear in the expression. */ public Vector aggregate_functions_found { get; construct; } /** * The original expression that was analyzed. */ public string original_expression { get; construct; } /** * Creates a new AggregateAnalysis. * * @param contains_aggregate Whether aggregates were found * @param aggregate_functions_found List of aggregate function names found * @param original_expression The expression that was analyzed */ public AggregateAnalysis( bool contains_aggregate, Vector aggregate_functions_found, string original_expression ) { Object( contains_aggregate: contains_aggregate, aggregate_functions_found: aggregate_functions_found, original_expression: original_expression ); } } /** * Result of splitting an expression into aggregate and non-aggregate parts. * * When a compound expression contains both aggregate and non-aggregate conditions, * this class holds the separated parts for use in WHERE and HAVING clauses. * * Example: * {{{ * // Input: "user_id > 100 && order_count >= 5" * // Where order_count is an aggregate like COUNT(o.id) * split.non_aggregate_part == "user_id > 100" // Goes to WHERE * split.aggregate_part == "order_count >= 5" // Goes to HAVING * }}} */ public class SplitExpression : Object { /** * The non-aggregate part of the expression. * * This part should be applied to the WHERE clause. * May be null if the entire expression contains aggregates. */ public string? non_aggregate_part { get; construct; } /** * The aggregate part of the expression. * * This part should be applied to the HAVING clause. * May be null if the expression contains no aggregates. */ public string? aggregate_part { get; construct; } /** * Indicates if the expression combines aggregate and non-aggregate parts with OR. * * When true, a subquery wrapper is required because SQL cannot mix * WHERE and HAVING conditions with OR logic. * * Example requiring subquery: * {{{ * // "user_id > 100 || COUNT(o.id) >= 5" * // Cannot be split into separate WHERE and HAVING * split.needs_subquery == true * }}} */ public bool needs_subquery { get; construct; } /** * Creates a new SplitExpression. * * @param non_aggregate_part The WHERE clause part (or null) * @param aggregate_part The HAVING clause part (or null) * @param needs_subquery True if subquery wrapper is needed */ public SplitExpression( string? non_aggregate_part, string? aggregate_part, bool needs_subquery ) { Object( non_aggregate_part: non_aggregate_part, aggregate_part: aggregate_part, needs_subquery: needs_subquery ); } } /** * Analyzes expressions to detect aggregate functions for WHERE/HAVING split. * * SQL requires aggregate conditions to be in the HAVING clause, while * non-aggregate conditions go in the WHERE clause. This analyzer detects * aggregate functions and helps split compound expressions appropriately. * * Aggregate functions detected: * - COUNT - Counts rows or non-null values * - SUM - Sums numeric values * - AVG - Calculates average * - MIN - Finds minimum value * - MAX - Finds maximum value * - GROUP_CONCAT - Concatenates strings (SQLite specific) * * Example usage: * {{{ * var analyzer = new AggregateAnalyzer(); * * // Check for aggregates * if (analyzer.contains_aggregate("COUNT(o.id) > 5")) { * // Use in HAVING clause * } * * // Split mixed expression * var split = analyzer.split_expression("user_id > 100 && COUNT(o.id) >= 5"); * // split.non_aggregate_part == "user_id > 100" * // split.aggregate_part == "COUNT(o.id) >= 5" * }}} */ public class AggregateAnalyzer : Object { /** * Set of aggregate function names that trigger HAVING clause usage. * * These are stored in uppercase for case-insensitive matching. */ private static HashSet? _aggregate_functions = null; /** * Gets the set of aggregate function names. * * Lazily initializes the set on first access. * * @return The set of aggregate function names in uppercase */ private static HashSet get_aggregate_functions() { if (_aggregate_functions == null) { _aggregate_functions = new HashSet(); _aggregate_functions.add("COUNT"); _aggregate_functions.add("SUM"); _aggregate_functions.add("AVG"); _aggregate_functions.add("MIN"); _aggregate_functions.add("MAX"); _aggregate_functions.add("GROUP_CONCAT"); } return _aggregate_functions; } /** * Analyzes an expression string and returns information about aggregates. * * This method parses the expression and identifies any aggregate functions. * It returns an AggregateAnalysis object with details about what was found. * * @param expression The Invercargill expression string to analyze * @return An AggregateAnalysis with detection results */ public AggregateAnalysis analyze(string expression) { var found_functions = new Vector(); bool contains = scan_for_aggregates(expression, found_functions); return new AggregateAnalysis(contains, found_functions, expression); } /** * Checks if an expression contains any aggregate functions. * * This is a convenience method for quickly checking if HAVING clause * handling is needed. * * @param expression The Invercargill expression string to check * @return True if the expression contains aggregate functions */ public bool contains_aggregate(string expression) { return scan_for_aggregates(expression, null); } /** * Splits a compound expression into aggregate and non-aggregate parts. * * This method analyzes an expression and separates conditions that * should go in WHERE vs HAVING clauses. It handles AND-connected * conditions properly but flags OR-combined mixed conditions as * requiring a subquery wrapper. * * Example: * {{{ * // AND-connected (can split): * // Input: "user_id > 100 && COUNT(o.id) >= 5" * // Result: * // non_aggregate_part = "user_id > 100" * // aggregate_part = "COUNT(o.id) >= 5" * // needs_subquery = false * * // OR-connected mixed (needs subquery): * // Input: "user_id > 100 || COUNT(o.id) >= 5" * // Result: * // non_aggregate_part = null * // aggregate_part = null * // needs_subquery = true * }}} * * @param expression The compound expression to split * @return A SplitExpression with the separated parts */ public SplitExpression split_expression(string expression) { var trimmed = expression.strip(); if (trimmed.length == 0) { return new SplitExpression(null, null, false); } // Parse as expression tree if possible try { var parsed = ExpressionParser.parse(trimmed); return split_expression_tree(parsed); } catch (Error e) { // Fall back to simple scanning if parsing fails return split_expression_simple(trimmed); } } /** * Scans an expression string for aggregate function calls. * * This method uses simple pattern matching to find function calls * and check if they are aggregate functions. * * @param expression The expression to scan * @param found_functions Optional vector to collect found function names * @return True if any aggregate functions were found */ private bool scan_for_aggregates(string expression, Vector? found_functions) { var aggregates = get_aggregate_functions(); bool found = false; // Scan for function calls pattern: WORD( // We need to find word boundaries and check against known aggregates int i = 0; int len = expression.length; while (i < len) { // Skip whitespace while (i < len && expression[i].isspace()) { i++; } if (i >= len) break; // Check if we're at the start of an identifier if (expression[i].isalpha() || expression[i] == '_') { int start = i; // Read the identifier while (i < len && (expression[i].isalnum() || expression[i] == '_')) { i++; } string identifier = expression.substring(start, i - start).up(); // Skip whitespace before potential parenthesis int j = i; while (j < len && expression[j].isspace()) { j++; } // Check if this is a function call if (j < len && expression[j] == '(') { if (aggregates.contains(identifier)) { found = true; if (found_functions != null) { found_functions.add(identifier); } } } i = j; } else { i++; } } return found; } /** * Splits a parsed expression tree into aggregate and non-aggregate parts. * * This method handles the expression tree structure properly, correctly * identifying AND vs OR combinations and nested expressions. * * @param expr The parsed expression tree * @return A SplitExpression with the separated parts */ private SplitExpression split_expression_tree(Expression expr) { // Check if it's a binary expression (potential AND/OR) if (expr is BinaryExpression) { var binary = (BinaryExpression) expr; // Handle AND - can split if (binary.op == BinaryOperator.AND) { var left_split = split_expression_tree(binary.left); var right_split = split_expression_tree(binary.right); // Combine results return combine_and_splits(left_split, right_split); } // Handle OR - check if mixed if (binary.op == BinaryOperator.OR) { bool left_has_agg = expression_contains_aggregate(binary.left); bool right_has_agg = expression_contains_aggregate(binary.right); // If both sides are same type, no subquery needed if (left_has_agg == right_has_agg) { if (left_has_agg) { // Both aggregate - whole thing goes to HAVING return new SplitExpression(null, expr_to_string(expr), false); } else { // Both non-aggregate - whole thing goes to WHERE return new SplitExpression(expr_to_string(expr), null, false); } } // Mixed OR - needs subquery return new SplitExpression(null, null, true); } } // For non-binary or other operators, check if it contains aggregates bool has_aggregate = expression_contains_aggregate(expr); string expr_str = expr_to_string(expr); if (has_aggregate) { return new SplitExpression(null, expr_str, false); } else { return new SplitExpression(expr_str, null, false); } } /** * Combines two SplitExpressions from AND-connected expressions. * * @param left The split from the left side of AND * @param right The split from the right side of AND * @return A combined SplitExpression */ private SplitExpression combine_and_splits(SplitExpression left, SplitExpression right) { // If either needs subquery, propagate that if (left.needs_subquery || right.needs_subquery) { return new SplitExpression(null, null, true); } // Combine non-aggregate parts string? non_agg = null; if (left.non_aggregate_part != null && right.non_aggregate_part != null) { non_agg = @"($(left.non_aggregate_part) AND $(right.non_aggregate_part))"; } else if (left.non_aggregate_part != null) { non_agg = left.non_aggregate_part; } else if (right.non_aggregate_part != null) { non_agg = right.non_aggregate_part; } // Combine aggregate parts string? agg = null; if (left.aggregate_part != null && right.aggregate_part != null) { agg = @"($(left.aggregate_part) AND $(right.aggregate_part))"; } else if (left.aggregate_part != null) { agg = left.aggregate_part; } else if (right.aggregate_part != null) { agg = right.aggregate_part; } return new SplitExpression(non_agg, agg, false); } /** * Checks if an expression tree contains aggregate functions. * * @param expr The expression tree to check * @return True if aggregates are found */ private bool expression_contains_aggregate(Expression expr) { var visitor = new AggregateDetectionVisitor(); expr.accept(visitor); return visitor.found_aggregate; } /** * Converts an expression tree back to a string representation. * * @param expr The expression tree to convert * @return The string representation */ private string expr_to_string(Expression expr) { var visitor = new ExpressionStringVisitor(); expr.accept(visitor); return visitor.get_string(); } /** * Fallback method for splitting expressions when parsing fails. * * Uses simple scanning to detect aggregates and cannot properly * handle complex nested expressions. * * @param expression The expression to split * @return A SplitExpression based on simple scanning */ private SplitExpression split_expression_simple(string expression) { bool has_aggregate = contains_aggregate(expression); if (has_aggregate) { // Check for OR that might mix aggregate and non-aggregate // This is a simplified check - doesn't handle nested parentheses bool has_or = contains_mixed_or(expression); if (has_or) { return new SplitExpression(null, null, true); } return new SplitExpression(null, expression, false); } else { return new SplitExpression(expression, null, false); } } /** * Checks if an expression contains OR that might mix aggregate and non-aggregate. * * This is a simplified check used as a fallback when expression parsing fails. * * @param expression The expression to check * @return True if potentially mixed OR is found */ private bool contains_mixed_or(string expression) { // Look for OR keyword at the top level (not inside parentheses) int paren_depth = 0; int i = 0; int len = expression.length; while (i < len) { char c = expression[i]; if (c == '(') { paren_depth++; } else if (c == ')') { paren_depth--; } else if (paren_depth == 0) { // Check for OR keyword if (i + 2 < len) { string substr = expression.substring(i, 2).up(); if (substr == "OR" || (i + 3 < len && expression.substring(i, 3).up() == "OR ")) { // Found OR at top level - check if both sides have different aggregate status string left = expression.substring(0, i).strip(); string right = expression.substring(i + 2).strip(); // Remove leading "OR" if present if (right.has_prefix("OR") || right.has_prefix("or")) { right = right.substring(2).strip(); } bool left_has = contains_aggregate(left); bool right_has = contains_aggregate(right); if (left_has != right_has) { return true; } } } } i++; } return false; } } /** * Expression visitor that detects aggregate function calls. * * This visitor traverses an expression tree and sets found_aggregate to true * if any aggregate functions (COUNT, SUM, AVG, MIN, MAX, GROUP_CONCAT) are found. */ internal class AggregateDetectionVisitor : Object, ExpressionVisitor { /** * Indicates whether an aggregate function was found during traversal. */ public bool found_aggregate { get; private set; } private static HashSet _aggregate_functions = null; /** * Creates a new AggregateDetectionVisitor. */ public AggregateDetectionVisitor() { found_aggregate = false; } /** * Gets the set of aggregate function names. */ private static HashSet get_aggregate_functions() { if (_aggregate_functions == null) { _aggregate_functions = new HashSet(); _aggregate_functions.add("COUNT"); _aggregate_functions.add("SUM"); _aggregate_functions.add("AVG"); _aggregate_functions.add("MIN"); _aggregate_functions.add("MAX"); _aggregate_functions.add("GROUP_CONCAT"); } return _aggregate_functions; } public void visit_binary(BinaryExpression expr) { if (!found_aggregate) { expr.left.accept(this); } if (!found_aggregate) { expr.right.accept(this); } } public void visit_property(PropertyExpression expr) { // Properties don't contain aggregates } public void visit_literal(LiteralExpression expr) { // Literals don't contain aggregates } public void visit_unary(UnaryExpression expr) { if (!found_aggregate) { // The operand will be visited by UnaryExpression.accept() } } public void visit_ternary(TernaryExpression expr) { if (!found_aggregate) { expr.condition.accept(this); } if (!found_aggregate) { expr.true_expression.accept(this); } if (!found_aggregate) { expr.false_expression.accept(this); } } public void visit_lambda(LambdaExpression expr) { if (!found_aggregate) { expr.body.accept(this); } } public void visit_bracketed(BracketedExpression expr) { if (!found_aggregate) { expr.inner.accept(this); } } public void visit_variable(VariableExpression expr) { // Variables don't contain aggregates } public void visit_function_call(FunctionCallExpression expr) { // Method calls don't contain aggregates (they're not SQL functions) // But their arguments might if (!found_aggregate && expr.arguments != null) { foreach (var arg in expr.arguments) { if (found_aggregate) break; arg.accept(this); } } } public void visit_global_function_call(GlobalFunctionCallExpression expr) { var aggregates = get_aggregate_functions(); string func_name = expr.function_name.up(); if (aggregates.contains(func_name)) { found_aggregate = true; } // Also check arguments for nested aggregates if (!found_aggregate && expr.arguments != null) { foreach (var arg in expr.arguments) { if (found_aggregate) break; arg.accept(this); } } } public void visit_lot_literal(LotLiteralExpression expr) { // Collection literals don't contain aggregates } } /** * Expression visitor that converts an expression tree back to a string. * * This is used to reconstruct expression strings after splitting. */ internal class ExpressionStringVisitor : Object, ExpressionVisitor { private StringBuilder _builder; private string? _pending_property_name = null; // Property name waiting for target variable /** * Creates a new ExpressionStringVisitor. */ public ExpressionStringVisitor() { _builder = new StringBuilder(); _pending_property_name = null; } /** * Gets the string representation of the visited expression. * * @return The expression as a string */ public string get_string() { return _builder.str; } public void visit_binary(BinaryExpression expr) { _builder.append("("); expr.left.accept(this); _builder.append(get_operator_string(expr.op)); expr.right.accept(this); _builder.append(")"); } private string get_operator_string(BinaryOperator op) { switch (op) { case BinaryOperator.EQUAL: return " == "; case BinaryOperator.NOT_EQUAL: return " != "; case BinaryOperator.GREATER_THAN: return " > "; case BinaryOperator.GREATER_EQUAL: return " >= "; case BinaryOperator.LESS_THAN: return " < "; case BinaryOperator.LESS_EQUAL: return " <= "; case BinaryOperator.AND: return " && "; case BinaryOperator.OR: return " || "; case BinaryOperator.ADD: return " + "; case BinaryOperator.SUBTRACT: return " - "; case BinaryOperator.MULTIPLY: return " * "; case BinaryOperator.DIVIDE: return " / "; case BinaryOperator.MODULO: return " % "; default: return " ? "; } } public void visit_property(PropertyExpression expr) { // The library's PropertyExpression.accept() calls: // 1. visit_property() - our method here // 2. target.accept() - which calls visit_variable() // So visit_variable is called AFTER visit_property. // We save the property name and output it when visit_variable is called. _pending_property_name = expr.property_name; } public void visit_literal(LiteralExpression expr) { var value = expr.value; if (value.assignable_to_type(typeof(string))) { string? s = null; if (value.try_get_as(out s) && s != null) { _builder.append("\""); _builder.append(s); _builder.append("\""); } } else if (value.assignable_to_type(typeof(int64))) { int64? i = null; if (value.try_get_as(out i) && i != null) { _builder.append(i.to_string()); } } else if (value.assignable_to_type(typeof(double))) { double? d = null; if (value.try_get_as(out d) && d != null) { _builder.append(d.to_string()); } } else if (value.assignable_to_type(typeof(bool))) { bool? b = null; if (value.try_get_as(out b) && b != null) { _builder.append(b ? "true" : "false"); } } else if (value.is_null()) { _builder.append("null"); } else { _builder.append(value.to_string()); } } public void visit_unary(UnaryExpression expr) { if (expr.operator == UnaryOperator.NOT) { _builder.append("!"); } else if (expr.operator == UnaryOperator.NEGATE) { _builder.append("-"); } } public void visit_ternary(TernaryExpression expr) { expr.condition.accept(this); _builder.append(" ? "); expr.true_expression.accept(this); _builder.append(" : "); expr.false_expression.accept(this); } public void visit_lambda(LambdaExpression expr) { _builder.append(expr.parameter_name); _builder.append(" => "); expr.body.accept(this); } public void visit_bracketed(BracketedExpression expr) { _builder.append("("); expr.inner.accept(this); _builder.append(")"); } public void visit_variable(VariableExpression expr) { // If there's a pending property name, this variable is the target of a property expression // Output: variable.property_name if (_pending_property_name != null) { _builder.append(expr.variable_name); _builder.append("."); _builder.append(_pending_property_name); _pending_property_name = null; // Clear it } else { _builder.append(expr.variable_name); } } public void visit_function_call(FunctionCallExpression expr) { expr.target.accept(this); _builder.append("."); _builder.append(expr.function_name); _builder.append("("); if (expr.arguments != null) { bool first = true; foreach (var arg in expr.arguments) { if (!first) _builder.append(", "); arg.accept(this); first = false; } } _builder.append(")"); } public void visit_global_function_call(GlobalFunctionCallExpression expr) { _builder.append(expr.function_name); _builder.append("("); if (expr.arguments != null) { bool first = true; foreach (var arg in expr.arguments) { if (!first) _builder.append(", "); arg.accept(this); first = false; } } _builder.append(")"); } public void visit_lot_literal(LotLiteralExpression expr) { _builder.append("["); bool first = true; foreach (var element in expr.elements) { if (!first) _builder.append(", "); element.accept(this); first = false; } _builder.append("]"); } } }