| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535 |
- using Invercargill.DataStructures;
- namespace Invercargill.Expressions {
- /**
- * Parser for expression strings.
- *
- * Converts expression strings into Expression trees using recursive descent parsing.
- * Supports:
- * - Literals: integers, floats, strings (single/double quotes), booleans, null
- * - Variables: identifiers
- * - Operators: +, -, *, /, %, ==, !=, <, >, <=, >=, &&, ||, !
- * - Ternary: condition ? true_expr : false_expr
- * - Property access: obj.property
- * - Function calls: obj.method(args...)
- * - Lambdas: x => expression
- * - Parentheses: (expression)
- *
- * Operator precedence (lowest to highest):
- * 1. Ternary: ? :
- * 2. Or: ||
- * 3. And: &&
- * 4. Equality: ==, !=
- * 5. Comparison: <, >, <=, >=
- * 6. Additive: +, -
- * 7. Multiplicative: *, /, %
- * 8. Unary: !, -
- * 9. Postfix: .property, .method(args), (args)
- * 10. Primary: literals, variables, parentheses
- */
- public class ExpressionParser : Object {
- private Token[] _tokens;
- private int _position;
- private Element[] _params;
- /**
- * Creates a new parser for the given token stream.
- *
- * @param tokens The tokens to parse
- */
- public ExpressionParser(Series<Token> tokens) {
- _tokens = tokens.to_array();
- _position = 0;
- _params = new Element[0];
- }
- /**
- * Parses an expression string and returns the expression tree.
- *
- * @param input The expression string to parse
- * @return The root of the expression tree
- * @throws ExpressionError if parsing fails
- */
- public static Expression parse(string input) throws ExpressionError {
- var tokenizer = new ExpressionTokenizer(input);
- var tokens = tokenizer.tokenize_all();
- var parser = new ExpressionParser(tokens);
- return parser.parse_expression();
- }
- /**
- * Parses an expression string with positional parameters.
- *
- * Parameters are referenced using $0, $1, $2, etc. syntax.
- *
- * @param input The expression string to parse
- * @param params Enumerable of Element values for $0, $1, $2, etc.
- * @return The root of the expression tree
- * @throws ExpressionError if parsing fails or parameter index is out of range
- */
- public static Expression parse_with_params(string input, Enumerable<Element> params) throws ExpressionError {
- var tokenizer = new ExpressionTokenizer(input);
- var tokens = tokenizer.tokenize_all();
- var parser = new ExpressionParser(tokens);
-
- // Convert enumerable to array for indexed access
- parser._params = params.to_array();
-
- return parser.parse_expression();
- }
- /**
- * Parses the token stream and returns the expression tree.
- *
- * @return The root of the expression tree
- * @throws ExpressionError if parsing fails
- */
- public Expression parse_expression() throws ExpressionError {
- var expr = parse_ternary();
-
- // Ensure we've consumed all tokens
- if (!is_at_end()) {
- var token = peek();
- throw new ExpressionError.INVALID_SYNTAX(
- @"Unexpected token '$(token.value)' at position $(token.position)"
- );
- }
-
- return expr;
- }
- // ==================== Precedence Levels ====================
- // Ternary: ? :
- private Expression parse_ternary() throws ExpressionError {
- var condition = parse_or();
- if (match(TokenType.QUESTION)) {
- var true_expr = parse_ternary();
- expect(TokenType.COLON, "Expected ':' in ternary expression");
- var false_expr = parse_ternary();
- return new TernaryExpression(condition, true_expr, false_expr);
- }
- return condition;
- }
- // Or: ||
- private Expression parse_or() throws ExpressionError {
- var left = parse_and();
- while (match(TokenType.OR)) {
- var right = parse_and();
- left = new BinaryExpression(left, right, BinaryOperator.OR);
- }
- return left;
- }
- // And: &&
- private Expression parse_and() throws ExpressionError {
- var left = parse_equality();
- while (match(TokenType.AND)) {
- var right = parse_equality();
- left = new BinaryExpression(left, right, BinaryOperator.AND);
- }
- return left;
- }
- // Equality: ==, !=
- private Expression parse_equality() throws ExpressionError {
- var left = parse_comparison();
- while (true) {
- if (match(TokenType.EQUALS)) {
- var right = parse_comparison();
- left = new BinaryExpression(left, right, BinaryOperator.EQUAL);
- } else if (match(TokenType.NOT_EQUALS)) {
- var right = parse_comparison();
- left = new BinaryExpression(left, right, BinaryOperator.NOT_EQUAL);
- } else {
- break;
- }
- }
- return left;
- }
- // Comparison: <, >, <=, >=
- private Expression parse_comparison() throws ExpressionError {
- var left = parse_additive();
- while (true) {
- if (match(TokenType.LESS_THAN)) {
- var right = parse_additive();
- left = new BinaryExpression(left, right, BinaryOperator.LESS_THAN);
- } else if (match(TokenType.GREATER_THAN)) {
- var right = parse_additive();
- left = new BinaryExpression(left, right, BinaryOperator.GREATER_THAN);
- } else if (match(TokenType.LESS_EQUALS)) {
- var right = parse_additive();
- left = new BinaryExpression(left, right, BinaryOperator.LESS_EQUAL);
- } else if (match(TokenType.GREATER_EQUALS)) {
- var right = parse_additive();
- left = new BinaryExpression(left, right, BinaryOperator.GREATER_EQUAL);
- } else {
- break;
- }
- }
- return left;
- }
- // Additive: +, -
- private Expression parse_additive() throws ExpressionError {
- var left = parse_multiplicative();
- while (true) {
- if (match(TokenType.PLUS)) {
- var right = parse_multiplicative();
- left = new BinaryExpression(left, right, BinaryOperator.ADD);
- } else if (match(TokenType.MINUS)) {
- var right = parse_multiplicative();
- left = new BinaryExpression(left, right, BinaryOperator.SUBTRACT);
- } else {
- break;
- }
- }
- return left;
- }
- // Multiplicative: *, /, %
- private Expression parse_multiplicative() throws ExpressionError {
- var left = parse_unary();
- while (true) {
- if (match(TokenType.STAR)) {
- var right = parse_unary();
- left = new BinaryExpression(left, right, BinaryOperator.MULTIPLY);
- } else if (match(TokenType.SLASH)) {
- var right = parse_unary();
- left = new BinaryExpression(left, right, BinaryOperator.DIVIDE);
- } else if (match(TokenType.PERCENT)) {
- var right = parse_unary();
- left = new BinaryExpression(left, right, BinaryOperator.MODULO);
- } else {
- break;
- }
- }
- return left;
- }
- // Unary: !, -
- private Expression parse_unary() throws ExpressionError {
- if (match(TokenType.NOT)) {
- var operand = parse_unary();
- return new UnaryExpression(UnaryOperator.NOT, operand);
- }
- if (match(TokenType.MINUS)) {
- var operand = parse_unary();
- return new UnaryExpression(UnaryOperator.NEGATE, operand);
- }
- return parse_postfix();
- }
- // Postfix: .property, .method(args)
- private Expression parse_postfix() throws ExpressionError {
- var expr = parse_primary();
- while (true) {
- if (match(TokenType.DOT)) {
- var name_token = expect(TokenType.IDENTIFIER, "Expected property or method name after '.'");
-
- if (match(TokenType.LPAREN)) {
- // Function call
- var args = parse_arguments();
- expect(TokenType.RPAREN, "Expected ')' after function arguments");
- expr = new FunctionCallExpression(expr, name_token.value, args);
- } else {
- // Property access
- expr = new PropertyExpression(expr, name_token.value);
- }
- } else {
- break;
- }
- }
- return expr;
- }
- // Primary: literals, variables, parentheses, lambdas, lot literals
- private Expression parse_primary() throws ExpressionError {
- // Lot literal: [expr1, expr2, ...]
- if (match(TokenType.LBRACKET)) {
- return parse_lot_literal();
- }
- // Parenthesized expression or lambda
- if (match(TokenType.LPAREN)) {
- // Check if this is a lambda: (x) => expr or () => expr
- // We need to look ahead for => after the closing paren
- var saved_position = _position;
-
- // Try to parse as just identifiers followed by )
- var param_names = new Series<string>();
- bool is_lambda = false;
-
- // Empty parens: () => expr
- if (check(TokenType.RPAREN)) {
- advance();
- if (match(TokenType.ARROW)) {
- is_lambda = true;
- } else {
- // Just empty parens - error
- throw new ExpressionError.INVALID_SYNTAX(
- "Empty parentheses are not valid"
- );
- }
- } else {
- // Try to parse as parameter list
- while (!check(TokenType.RPAREN) && !is_at_end()) {
- if (check(TokenType.IDENTIFIER)) {
- param_names.add(advance().value);
- if (!check(TokenType.RPAREN)) {
- if (!match(TokenType.COMMA)) {
- break; // Not a lambda parameter list
- }
- }
- } else {
- break; // Not a lambda parameter list
- }
- }
-
- if (check(TokenType.RPAREN)) {
- advance(); // consume )
- if (match(TokenType.ARROW)) {
- is_lambda = true;
- }
- }
- }
-
- if (is_lambda) {
- // It's a lambda with parenthesized parameters
- if (param_names.length != 1) {
- throw new ExpressionError.INVALID_SYNTAX(
- "Lambda expressions require exactly one parameter"
- );
- }
- var body = parse_ternary();
- return new LambdaExpression(param_names.first(), body);
- } else {
- // Reset and parse as grouped expression
- _position = saved_position;
- var expr = parse_ternary();
- expect(TokenType.RPAREN, "Expected ')' after expression");
- return new BracketedExpression(expr);
- }
- }
- // Lambda: identifier => expr
- if (check(TokenType.IDENTIFIER)) {
- // Look ahead for =>
- var saved_position = _position;
- var name_token = advance();
-
- if (match(TokenType.ARROW)) {
- // It's a lambda: x => expr
- var body = parse_ternary();
- return new LambdaExpression(name_token.value, body);
- } else {
- // Reset and continue as variable
- _position = saved_position;
- }
- }
- // Literals and variables
- if (match(TokenType.INTEGER)) {
- var token = previous();
- int64 value = int64.parse(token.value);
- return new LiteralExpression(new NativeElement<int64?>(value));
- }
- if (match(TokenType.LONG_INTEGER)) {
- var token = previous();
- int64 value = int64.parse(token.value);
- return new LiteralExpression(new NativeElement<int64?>(value));
- }
- if (match(TokenType.UNSIGNED_LONG)) {
- var token = previous();
- uint64 value = uint64.parse(token.value);
- return new LiteralExpression(new NativeElement<uint64?>(value));
- }
- if (match(TokenType.FLOAT)) {
- var token = previous();
- double value = double.parse(token.value);
- return new LiteralExpression(new NativeElement<double?>(value));
- }
- if (match(TokenType.FLOAT_LITERAL)) {
- var token = previous();
- float value = (float)double.parse(token.value);
- return new LiteralExpression(new NativeElement<float?>(value));
- }
- if (match(TokenType.STRING)) {
- var token = previous();
- return new LiteralExpression(new NativeElement<string>(token.value));
- }
- if (match(TokenType.CHAR_LITERAL)) {
- var token = previous();
- // The value is stored as a single character string
- char value = token.value.length > 0 ? token.value[0] : '\0';
- return new LiteralExpression(new NativeElement<char>(value));
- }
- if (match(TokenType.TRUE)) {
- return new LiteralExpression(new NativeElement<bool>(true));
- }
- if (match(TokenType.FALSE)) {
- return new LiteralExpression(new NativeElement<bool>(false));
- }
- if (match(TokenType.NULL_LITERAL)) {
- return new LiteralExpression(new NullElement());
- }
- // Parameter placeholder: $0, $1, etc.
- if (match(TokenType.PARAMETER)) {
- var token = previous();
- int index = int.parse(token.value);
-
- if (index < 0 || index >= _params.length) {
- throw new ExpressionError.INVALID_SYNTAX(
- @"Parameter index $$index out of range"
- );
- }
-
- return new ParameterExpression(index, _params[index]);
- }
- // Variable or standalone function call
- if (match(TokenType.IDENTIFIER)) {
- var token = previous();
-
- // Check if this is a standalone function call: func(args)
- if (match(TokenType.LPAREN)) {
- // It's a global function call
- var args = parse_arguments();
- expect(TokenType.RPAREN, "Expected ')' after function arguments");
-
- return new GlobalFunctionCallExpression(token.value, args);
- }
-
- return new VariableExpression(token.value);
- }
- // Error: unexpected token
- var current = peek();
- throw new ExpressionError.INVALID_SYNTAX(
- @"Unexpected token '$(current.value)' at position $(current.position)"
- );
- }
- // Parse function call arguments
- private Series<Expression> parse_arguments() throws ExpressionError {
- var args = new Series<Expression>();
- if (check(TokenType.RPAREN)) {
- return args; // Empty argument list
- }
- args.add(parse_ternary());
- while (match(TokenType.COMMA)) {
- args.add(parse_ternary());
- }
- return args;
- }
- // Parse lot literal: [expr1, expr2, ...]
- private Expression parse_lot_literal() throws ExpressionError {
- var elements = new Series<Expression>();
- // Empty lot: []
- if (check(TokenType.RBRACKET)) {
- advance();
- return new LotLiteralExpression(new Expression[0]);
- }
- // Parse first element
- elements.add(parse_ternary());
- // Parse remaining elements
- while (match(TokenType.COMMA)) {
- elements.add(parse_ternary());
- }
- expect(TokenType.RBRACKET, "Expected ']' after lot literal elements");
- return new LotLiteralExpression(elements.to_array());
- }
- // ==================== Helper Methods ====================
- private bool is_at_end() {
- return peek().token_type == TokenType.EOF;
- }
- private Token peek() {
- if (_position >= _tokens.length) {
- return new Token(TokenType.EOF, "", _position);
- }
- return _tokens[_position];
- }
- private Token previous() {
- if (_position <= 0 || _position > _tokens.length) {
- return new Token(TokenType.EOF, "", _position - 1);
- }
- return _tokens[_position - 1];
- }
- private Token advance() {
- if (!is_at_end()) {
- _position++;
- }
- return previous();
- }
- private bool check(TokenType type) {
- if (is_at_end()) return false;
- return peek().token_type == type;
- }
- private bool match(TokenType type) {
- if (check(type)) {
- advance();
- return true;
- }
- return false;
- }
- private Token expect(TokenType type, string message) throws ExpressionError {
- if (check(type)) {
- return advance();
- }
- var current = peek();
- throw new ExpressionError.INVALID_SYNTAX(
- @"$message, got '$(current.value)' at position $(current.position)"
- );
- }
- }
- }
|