ExpressionParser.vala 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. using Invercargill.DataStructures;
  2. namespace Invercargill.Expressions {
  3. /**
  4. * Parser for expression strings.
  5. *
  6. * Converts expression strings into Expression trees using recursive descent parsing.
  7. * Supports:
  8. * - Literals: integers, floats, strings (single/double quotes), booleans, null
  9. * - Variables: identifiers
  10. * - Operators: +, -, *, /, %, ==, !=, <, >, <=, >=, &&, ||, !
  11. * - Ternary: condition ? true_expr : false_expr
  12. * - Property access: obj.property
  13. * - Function calls: obj.method(args...)
  14. * - Lambdas: x => expression
  15. * - Parentheses: (expression)
  16. *
  17. * Operator precedence (lowest to highest):
  18. * 1. Ternary: ? :
  19. * 2. Or: ||
  20. * 3. And: &&
  21. * 4. Equality: ==, !=
  22. * 5. Comparison: <, >, <=, >=
  23. * 6. Additive: +, -
  24. * 7. Multiplicative: *, /, %
  25. * 8. Unary: !, -
  26. * 9. Postfix: .property, .method(args), (args)
  27. * 10. Primary: literals, variables, parentheses
  28. */
  29. public class ExpressionParser : Object {
  30. private Token[] _tokens;
  31. private int _position;
  32. private Element[] _params;
  33. /**
  34. * Creates a new parser for the given token stream.
  35. *
  36. * @param tokens The tokens to parse
  37. */
  38. public ExpressionParser(Series<Token> tokens) {
  39. _tokens = tokens.to_array();
  40. _position = 0;
  41. _params = new Element[0];
  42. }
  43. /**
  44. * Parses an expression string and returns the expression tree.
  45. *
  46. * @param input The expression string to parse
  47. * @return The root of the expression tree
  48. * @throws ExpressionError if parsing fails
  49. */
  50. public static Expression parse(string input) throws ExpressionError {
  51. var tokenizer = new ExpressionTokenizer(input);
  52. var tokens = tokenizer.tokenize_all();
  53. var parser = new ExpressionParser(tokens);
  54. return parser.parse_expression();
  55. }
  56. /**
  57. * Parses an expression string with positional parameters.
  58. *
  59. * Parameters are referenced using $0, $1, $2, etc. syntax.
  60. *
  61. * @param input The expression string to parse
  62. * @param params Enumerable of Element values for $0, $1, $2, etc.
  63. * @return The root of the expression tree
  64. * @throws ExpressionError if parsing fails or parameter index is out of range
  65. */
  66. public static Expression parse_with_params(string input, Enumerable<Element> params) throws ExpressionError {
  67. var tokenizer = new ExpressionTokenizer(input);
  68. var tokens = tokenizer.tokenize_all();
  69. var parser = new ExpressionParser(tokens);
  70. // Convert enumerable to array for indexed access
  71. parser._params = params.to_array();
  72. return parser.parse_expression();
  73. }
  74. /**
  75. * Parses the token stream and returns the expression tree.
  76. *
  77. * @return The root of the expression tree
  78. * @throws ExpressionError if parsing fails
  79. */
  80. public Expression parse_expression() throws ExpressionError {
  81. var expr = parse_ternary();
  82. // Ensure we've consumed all tokens
  83. if (!is_at_end()) {
  84. var token = peek();
  85. throw new ExpressionError.INVALID_SYNTAX(
  86. @"Unexpected token '$(token.value)' at position $(token.position)"
  87. );
  88. }
  89. return expr;
  90. }
  91. // ==================== Precedence Levels ====================
  92. // Ternary: ? :
  93. private Expression parse_ternary() throws ExpressionError {
  94. var condition = parse_or();
  95. if (match(TokenType.QUESTION)) {
  96. var true_expr = parse_ternary();
  97. expect(TokenType.COLON, "Expected ':' in ternary expression");
  98. var false_expr = parse_ternary();
  99. return new TernaryExpression(condition, true_expr, false_expr);
  100. }
  101. return condition;
  102. }
  103. // Or: ||
  104. private Expression parse_or() throws ExpressionError {
  105. var left = parse_and();
  106. while (match(TokenType.OR)) {
  107. var right = parse_and();
  108. left = new BinaryExpression(left, right, BinaryOperator.OR);
  109. }
  110. return left;
  111. }
  112. // And: &&
  113. private Expression parse_and() throws ExpressionError {
  114. var left = parse_equality();
  115. while (match(TokenType.AND)) {
  116. var right = parse_equality();
  117. left = new BinaryExpression(left, right, BinaryOperator.AND);
  118. }
  119. return left;
  120. }
  121. // Equality: ==, !=
  122. private Expression parse_equality() throws ExpressionError {
  123. var left = parse_comparison();
  124. while (true) {
  125. if (match(TokenType.EQUALS)) {
  126. var right = parse_comparison();
  127. left = new BinaryExpression(left, right, BinaryOperator.EQUAL);
  128. } else if (match(TokenType.NOT_EQUALS)) {
  129. var right = parse_comparison();
  130. left = new BinaryExpression(left, right, BinaryOperator.NOT_EQUAL);
  131. } else {
  132. break;
  133. }
  134. }
  135. return left;
  136. }
  137. // Comparison: <, >, <=, >=
  138. private Expression parse_comparison() throws ExpressionError {
  139. var left = parse_additive();
  140. while (true) {
  141. if (match(TokenType.LESS_THAN)) {
  142. var right = parse_additive();
  143. left = new BinaryExpression(left, right, BinaryOperator.LESS_THAN);
  144. } else if (match(TokenType.GREATER_THAN)) {
  145. var right = parse_additive();
  146. left = new BinaryExpression(left, right, BinaryOperator.GREATER_THAN);
  147. } else if (match(TokenType.LESS_EQUALS)) {
  148. var right = parse_additive();
  149. left = new BinaryExpression(left, right, BinaryOperator.LESS_EQUAL);
  150. } else if (match(TokenType.GREATER_EQUALS)) {
  151. var right = parse_additive();
  152. left = new BinaryExpression(left, right, BinaryOperator.GREATER_EQUAL);
  153. } else {
  154. break;
  155. }
  156. }
  157. return left;
  158. }
  159. // Additive: +, -
  160. private Expression parse_additive() throws ExpressionError {
  161. var left = parse_multiplicative();
  162. while (true) {
  163. if (match(TokenType.PLUS)) {
  164. var right = parse_multiplicative();
  165. left = new BinaryExpression(left, right, BinaryOperator.ADD);
  166. } else if (match(TokenType.MINUS)) {
  167. var right = parse_multiplicative();
  168. left = new BinaryExpression(left, right, BinaryOperator.SUBTRACT);
  169. } else {
  170. break;
  171. }
  172. }
  173. return left;
  174. }
  175. // Multiplicative: *, /, %
  176. private Expression parse_multiplicative() throws ExpressionError {
  177. var left = parse_unary();
  178. while (true) {
  179. if (match(TokenType.STAR)) {
  180. var right = parse_unary();
  181. left = new BinaryExpression(left, right, BinaryOperator.MULTIPLY);
  182. } else if (match(TokenType.SLASH)) {
  183. var right = parse_unary();
  184. left = new BinaryExpression(left, right, BinaryOperator.DIVIDE);
  185. } else if (match(TokenType.PERCENT)) {
  186. var right = parse_unary();
  187. left = new BinaryExpression(left, right, BinaryOperator.MODULO);
  188. } else {
  189. break;
  190. }
  191. }
  192. return left;
  193. }
  194. // Unary: !, -
  195. private Expression parse_unary() throws ExpressionError {
  196. if (match(TokenType.NOT)) {
  197. var operand = parse_unary();
  198. return new UnaryExpression(UnaryOperator.NOT, operand);
  199. }
  200. if (match(TokenType.MINUS)) {
  201. var operand = parse_unary();
  202. return new UnaryExpression(UnaryOperator.NEGATE, operand);
  203. }
  204. return parse_postfix();
  205. }
  206. // Postfix: .property, .method(args)
  207. private Expression parse_postfix() throws ExpressionError {
  208. var expr = parse_primary();
  209. while (true) {
  210. if (match(TokenType.DOT)) {
  211. var name_token = expect(TokenType.IDENTIFIER, "Expected property or method name after '.'");
  212. if (match(TokenType.LPAREN)) {
  213. // Function call
  214. var args = parse_arguments();
  215. expect(TokenType.RPAREN, "Expected ')' after function arguments");
  216. expr = new FunctionCallExpression(expr, name_token.value, args);
  217. } else {
  218. // Property access
  219. expr = new PropertyExpression(expr, name_token.value);
  220. }
  221. } else {
  222. break;
  223. }
  224. }
  225. return expr;
  226. }
  227. // Primary: literals, variables, parentheses, lambdas, lot literals
  228. private Expression parse_primary() throws ExpressionError {
  229. // Lot literal: [expr1, expr2, ...]
  230. if (match(TokenType.LBRACKET)) {
  231. return parse_lot_literal();
  232. }
  233. // Parenthesized expression or lambda
  234. if (match(TokenType.LPAREN)) {
  235. // Check if this is a lambda: (x) => expr or () => expr
  236. // We need to look ahead for => after the closing paren
  237. var saved_position = _position;
  238. // Try to parse as just identifiers followed by )
  239. var param_names = new Series<string>();
  240. bool is_lambda = false;
  241. // Empty parens: () => expr
  242. if (check(TokenType.RPAREN)) {
  243. advance();
  244. if (match(TokenType.ARROW)) {
  245. is_lambda = true;
  246. } else {
  247. // Just empty parens - error
  248. throw new ExpressionError.INVALID_SYNTAX(
  249. "Empty parentheses are not valid"
  250. );
  251. }
  252. } else {
  253. // Try to parse as parameter list
  254. while (!check(TokenType.RPAREN) && !is_at_end()) {
  255. if (check(TokenType.IDENTIFIER)) {
  256. param_names.add(advance().value);
  257. if (!check(TokenType.RPAREN)) {
  258. if (!match(TokenType.COMMA)) {
  259. break; // Not a lambda parameter list
  260. }
  261. }
  262. } else {
  263. break; // Not a lambda parameter list
  264. }
  265. }
  266. if (check(TokenType.RPAREN)) {
  267. advance(); // consume )
  268. if (match(TokenType.ARROW)) {
  269. is_lambda = true;
  270. }
  271. }
  272. }
  273. if (is_lambda) {
  274. // It's a lambda with parenthesized parameters
  275. if (param_names.length != 1) {
  276. throw new ExpressionError.INVALID_SYNTAX(
  277. "Lambda expressions require exactly one parameter"
  278. );
  279. }
  280. var body = parse_ternary();
  281. return new LambdaExpression(param_names.first(), body);
  282. } else {
  283. // Reset and parse as grouped expression
  284. _position = saved_position;
  285. var expr = parse_ternary();
  286. expect(TokenType.RPAREN, "Expected ')' after expression");
  287. return new BracketedExpression(expr);
  288. }
  289. }
  290. // Lambda: identifier => expr
  291. if (check(TokenType.IDENTIFIER)) {
  292. // Look ahead for =>
  293. var saved_position = _position;
  294. var name_token = advance();
  295. if (match(TokenType.ARROW)) {
  296. // It's a lambda: x => expr
  297. var body = parse_ternary();
  298. return new LambdaExpression(name_token.value, body);
  299. } else {
  300. // Reset and continue as variable
  301. _position = saved_position;
  302. }
  303. }
  304. // Literals and variables
  305. if (match(TokenType.INTEGER)) {
  306. var token = previous();
  307. int64 value = int64.parse(token.value);
  308. return new LiteralExpression(new NativeElement<int64?>(value));
  309. }
  310. if (match(TokenType.LONG_INTEGER)) {
  311. var token = previous();
  312. int64 value = int64.parse(token.value);
  313. return new LiteralExpression(new NativeElement<int64?>(value));
  314. }
  315. if (match(TokenType.UNSIGNED_LONG)) {
  316. var token = previous();
  317. uint64 value = uint64.parse(token.value);
  318. return new LiteralExpression(new NativeElement<uint64?>(value));
  319. }
  320. if (match(TokenType.FLOAT)) {
  321. var token = previous();
  322. double value = double.parse(token.value);
  323. return new LiteralExpression(new NativeElement<double?>(value));
  324. }
  325. if (match(TokenType.FLOAT_LITERAL)) {
  326. var token = previous();
  327. float value = (float)double.parse(token.value);
  328. return new LiteralExpression(new NativeElement<float?>(value));
  329. }
  330. if (match(TokenType.STRING)) {
  331. var token = previous();
  332. return new LiteralExpression(new NativeElement<string>(token.value));
  333. }
  334. if (match(TokenType.CHAR_LITERAL)) {
  335. var token = previous();
  336. // The value is stored as a single character string
  337. char value = token.value.length > 0 ? token.value[0] : '\0';
  338. return new LiteralExpression(new NativeElement<char>(value));
  339. }
  340. if (match(TokenType.TRUE)) {
  341. return new LiteralExpression(new NativeElement<bool>(true));
  342. }
  343. if (match(TokenType.FALSE)) {
  344. return new LiteralExpression(new NativeElement<bool>(false));
  345. }
  346. if (match(TokenType.NULL_LITERAL)) {
  347. return new LiteralExpression(new NullElement());
  348. }
  349. // Parameter placeholder: $0, $1, etc.
  350. if (match(TokenType.PARAMETER)) {
  351. var token = previous();
  352. int index = int.parse(token.value);
  353. if (index < 0 || index >= _params.length) {
  354. throw new ExpressionError.INVALID_SYNTAX(
  355. @"Parameter index $$index out of range"
  356. );
  357. }
  358. return new ParameterExpression(index, _params[index]);
  359. }
  360. // Variable or standalone function call
  361. if (match(TokenType.IDENTIFIER)) {
  362. var token = previous();
  363. // Check if this is a standalone function call: func(args)
  364. if (match(TokenType.LPAREN)) {
  365. // It's a global function call
  366. var args = parse_arguments();
  367. expect(TokenType.RPAREN, "Expected ')' after function arguments");
  368. return new GlobalFunctionCallExpression(token.value, args);
  369. }
  370. return new VariableExpression(token.value);
  371. }
  372. // Error: unexpected token
  373. var current = peek();
  374. throw new ExpressionError.INVALID_SYNTAX(
  375. @"Unexpected token '$(current.value)' at position $(current.position)"
  376. );
  377. }
  378. // Parse function call arguments
  379. private Series<Expression> parse_arguments() throws ExpressionError {
  380. var args = new Series<Expression>();
  381. if (check(TokenType.RPAREN)) {
  382. return args; // Empty argument list
  383. }
  384. args.add(parse_ternary());
  385. while (match(TokenType.COMMA)) {
  386. args.add(parse_ternary());
  387. }
  388. return args;
  389. }
  390. // Parse lot literal: [expr1, expr2, ...]
  391. private Expression parse_lot_literal() throws ExpressionError {
  392. var elements = new Series<Expression>();
  393. // Empty lot: []
  394. if (check(TokenType.RBRACKET)) {
  395. advance();
  396. return new LotLiteralExpression(new Expression[0]);
  397. }
  398. // Parse first element
  399. elements.add(parse_ternary());
  400. // Parse remaining elements
  401. while (match(TokenType.COMMA)) {
  402. elements.add(parse_ternary());
  403. }
  404. expect(TokenType.RBRACKET, "Expected ']' after lot literal elements");
  405. return new LotLiteralExpression(elements.to_array());
  406. }
  407. // ==================== Helper Methods ====================
  408. private bool is_at_end() {
  409. return peek().token_type == TokenType.EOF;
  410. }
  411. private Token peek() {
  412. if (_position >= _tokens.length) {
  413. return new Token(TokenType.EOF, "", _position);
  414. }
  415. return _tokens[_position];
  416. }
  417. private Token previous() {
  418. if (_position <= 0 || _position > _tokens.length) {
  419. return new Token(TokenType.EOF, "", _position - 1);
  420. }
  421. return _tokens[_position - 1];
  422. }
  423. private Token advance() {
  424. if (!is_at_end()) {
  425. _position++;
  426. }
  427. return previous();
  428. }
  429. private bool check(TokenType type) {
  430. if (is_at_end()) return false;
  431. return peek().token_type == type;
  432. }
  433. private bool match(TokenType type) {
  434. if (check(type)) {
  435. advance();
  436. return true;
  437. }
  438. return false;
  439. }
  440. private Token expect(TokenType type, string message) throws ExpressionError {
  441. if (check(type)) {
  442. return advance();
  443. }
  444. var current = peek();
  445. throw new ExpressionError.INVALID_SYNTAX(
  446. @"$message, got '$(current.value)' at position $(current.position)"
  447. );
  448. }
  449. }
  450. }