|
@@ -8,8 +8,12 @@ namespace Invercargill.Expressions {
|
|
|
public enum TokenType {
|
|
public enum TokenType {
|
|
|
// Literals
|
|
// Literals
|
|
|
INTEGER,
|
|
INTEGER,
|
|
|
|
|
+ LONG_INTEGER, // integer with L suffix
|
|
|
|
|
+ UNSIGNED_LONG, // integer with UL suffix
|
|
|
FLOAT,
|
|
FLOAT,
|
|
|
|
|
+ FLOAT_LITERAL, // float with f suffix
|
|
|
STRING,
|
|
STRING,
|
|
|
|
|
+ CHAR_LITERAL, // single character in single quotes
|
|
|
TRUE,
|
|
TRUE,
|
|
|
FALSE,
|
|
FALSE,
|
|
|
NULL_LITERAL,
|
|
NULL_LITERAL,
|
|
@@ -54,8 +58,12 @@ namespace Invercargill.Expressions {
|
|
|
public string to_string() {
|
|
public string to_string() {
|
|
|
switch (this) {
|
|
switch (this) {
|
|
|
case INTEGER: return "INTEGER";
|
|
case INTEGER: return "INTEGER";
|
|
|
|
|
+ case LONG_INTEGER: return "LONG_INTEGER";
|
|
|
|
|
+ case UNSIGNED_LONG: return "UNSIGNED_LONG";
|
|
|
case FLOAT: return "FLOAT";
|
|
case FLOAT: return "FLOAT";
|
|
|
|
|
+ case FLOAT_LITERAL: return "FLOAT_LITERAL";
|
|
|
case STRING: return "STRING";
|
|
case STRING: return "STRING";
|
|
|
|
|
+ case CHAR_LITERAL: return "CHAR_LITERAL";
|
|
|
case TRUE: return "TRUE";
|
|
case TRUE: return "TRUE";
|
|
|
case FALSE: return "FALSE";
|
|
case FALSE: return "FALSE";
|
|
|
case NULL_LITERAL: return "NULL";
|
|
case NULL_LITERAL: return "NULL";
|
|
@@ -267,10 +275,13 @@ namespace Invercargill.Expressions {
|
|
|
return read_parameter();
|
|
return read_parameter();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // String literals
|
|
|
|
|
- if (c == '"' || c == '\'') {
|
|
|
|
|
|
|
+ // String literals (double quotes) or char literals (single quotes)
|
|
|
|
|
+ if (c == '"') {
|
|
|
return read_string(c);
|
|
return read_string(c);
|
|
|
}
|
|
}
|
|
|
|
|
+ if (c == '\'') {
|
|
|
|
|
+ return read_char_literal();
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
// Numbers
|
|
// Numbers
|
|
|
if (c.isdigit()) {
|
|
if (c.isdigit()) {
|
|
@@ -322,6 +333,25 @@ namespace Invercargill.Expressions {
|
|
|
case '\\': sb.append("\\"); break;
|
|
case '\\': sb.append("\\"); break;
|
|
|
case '"': sb.append("\""); break;
|
|
case '"': sb.append("\""); break;
|
|
|
case '\'': sb.append("'"); break;
|
|
case '\'': sb.append("'"); break;
|
|
|
|
|
+ case '%': sb.append("%"); break;
|
|
|
|
|
+ case 'x':
|
|
|
|
|
+ // Hex escape \xNN
|
|
|
|
|
+ _position++;
|
|
|
|
|
+ if (_position + 1 >= _length) {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Invalid hex escape at position $(_position)"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ string hex = _input.substring(_position, 2);
|
|
|
|
|
+ if (!hex[0].isxdigit() || !hex[1].isxdigit()) {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Invalid hex escape '\\x$hex' at position $(_position)"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ int char_val = parse_hex(hex);
|
|
|
|
|
+ sb.append_c((char)char_val);
|
|
|
|
|
+ _position += 1; // Will be incremented again below
|
|
|
|
|
+ break;
|
|
|
default:
|
|
default:
|
|
|
throw new ExpressionError.INVALID_SYNTAX(
|
|
throw new ExpressionError.INVALID_SYNTAX(
|
|
|
@"Unknown escape sequence '\\$escaped' at position $(_position)"
|
|
@"Unknown escape sequence '\\$escaped' at position $(_position)"
|
|
@@ -339,7 +369,74 @@ namespace Invercargill.Expressions {
|
|
|
);
|
|
);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- private Token read_number() {
|
|
|
|
|
|
|
+ private Token read_char_literal() throws ExpressionError {
|
|
|
|
|
+ int start_pos = _position;
|
|
|
|
|
+ _position++; // Skip opening quote
|
|
|
|
|
+
|
|
|
|
|
+ if (_position >= _length) {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Unterminated character literal at position $start_pos"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ char c = _input[_position];
|
|
|
|
|
+ char char_value;
|
|
|
|
|
+
|
|
|
|
|
+ if (c == '\\') {
|
|
|
|
|
+ // Escape sequence
|
|
|
|
|
+ _position++;
|
|
|
|
|
+ if (_position >= _length) {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Unterminated character literal at position $start_pos"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ char escaped = _input[_position];
|
|
|
|
|
+ switch (escaped) {
|
|
|
|
|
+ case 'n': char_value = '\n'; break;
|
|
|
|
|
+ case 't': char_value = '\t'; break;
|
|
|
|
|
+ case 'r': char_value = '\r'; break;
|
|
|
|
|
+ case '\\': char_value = '\\'; break;
|
|
|
|
|
+ case '\'': char_value = '\''; break;
|
|
|
|
|
+ case 'x':
|
|
|
|
|
+ // Hex escape \xNN
|
|
|
|
|
+ _position++;
|
|
|
|
|
+ if (_position + 1 >= _length) {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Invalid hex escape at position $(_position)"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ string hex = _input.substring(_position, 2);
|
|
|
|
|
+ if (!hex[0].isxdigit() || !hex[1].isxdigit()) {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Invalid hex escape '\\x$hex' at position $(_position)"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ char_value = (char)parse_hex(hex);
|
|
|
|
|
+ _position++; // Extra increment for second hex digit
|
|
|
|
|
+ break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Unknown escape sequence '\\$escaped' in character literal at position $(_position)"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ char_value = c;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ _position++;
|
|
|
|
|
+
|
|
|
|
|
+ // Expect closing quote
|
|
|
|
|
+ if (_position >= _length || _input[_position] != '\'') {
|
|
|
|
|
+ throw new ExpressionError.INVALID_SYNTAX(
|
|
|
|
|
+ @"Unterminated character literal starting at position $start_pos"
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ _position++; // Skip closing quote
|
|
|
|
|
+
|
|
|
|
|
+ return new Token(TokenType.CHAR_LITERAL, char_value.to_string(), start_pos);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Token read_number() throws ExpressionError {
|
|
|
int start_pos = _position;
|
|
int start_pos = _position;
|
|
|
var sb = new StringBuilder();
|
|
var sb = new StringBuilder();
|
|
|
bool has_decimal = false;
|
|
bool has_decimal = false;
|
|
@@ -367,6 +464,30 @@ namespace Invercargill.Expressions {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
string value = sb.str;
|
|
string value = sb.str;
|
|
|
|
|
+
|
|
|
|
|
+ // Check for type suffixes
|
|
|
|
|
+ if (_position < _length) {
|
|
|
|
|
+ char suffix = _input[_position].tolower();
|
|
|
|
|
+
|
|
|
|
|
+ // Check for 'ul' or 'UL' suffix (unsigned long)
|
|
|
|
|
+ if (suffix == 'u' && _position + 1 < _length && _input[_position + 1].tolower() == 'l') {
|
|
|
|
|
+ _position += 2;
|
|
|
|
|
+ return new Token(TokenType.UNSIGNED_LONG, value, start_pos);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Check for 'l' suffix (long)
|
|
|
|
|
+ if (suffix == 'l') {
|
|
|
|
|
+ _position++;
|
|
|
|
|
+ return new Token(TokenType.LONG_INTEGER, value, start_pos);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Check for 'f' suffix (float)
|
|
|
|
|
+ if (suffix == 'f') {
|
|
|
|
|
+ _position++;
|
|
|
|
|
+ return new Token(TokenType.FLOAT_LITERAL, value, start_pos);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
if (has_decimal) {
|
|
if (has_decimal) {
|
|
|
return new Token(TokenType.FLOAT, value, start_pos);
|
|
return new Token(TokenType.FLOAT, value, start_pos);
|
|
|
} else {
|
|
} else {
|
|
@@ -421,6 +542,22 @@ namespace Invercargill.Expressions {
|
|
|
|
|
|
|
|
return new Token(TokenType.PARAMETER, sb.str, start_pos);
|
|
return new Token(TokenType.PARAMETER, sb.str, start_pos);
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ private static int parse_hex(string hex) {
|
|
|
|
|
+ int result = 0;
|
|
|
|
|
+ for (int i = 0; i < hex.length; i++) {
|
|
|
|
|
+ char c = hex[i];
|
|
|
|
|
+ result *= 16;
|
|
|
|
|
+ if (c >= '0' && c <= '9') {
|
|
|
|
|
+ result += c - '0';
|
|
|
|
|
+ } else if (c >= 'a' && c <= 'f') {
|
|
|
|
|
+ result += c - 'a' + 10;
|
|
|
|
|
+ } else if (c >= 'A' && c <= 'F') {
|
|
|
|
|
+ result += c - 'A' + 10;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
}
|
|
}
|