mirror of https://github.com/AMT-Cheif/drift.git
Scan identifiers
This commit is contained in:
parent
cafaafe2eb
commit
c297b27f60
|
@ -84,12 +84,16 @@ class Scanner {
|
|||
if (_match("'")) {
|
||||
_string(binary: false);
|
||||
} else {
|
||||
// todo probably an identifier if it doesn't start a string literal?
|
||||
_identifier();
|
||||
}
|
||||
break;
|
||||
case "'":
|
||||
_string();
|
||||
break;
|
||||
case '"':
|
||||
// todo sqlite also allows string literals with double ticks, we don't
|
||||
_identifier(escapedInQuotes: true);
|
||||
break;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
|
@ -99,6 +103,8 @@ class Scanner {
|
|||
default:
|
||||
if (isDigit(char)) {
|
||||
_numeric(char);
|
||||
} else if (canStartColumnName(char)) {
|
||||
_identifier();
|
||||
}
|
||||
errors.add(TokenizerError(
|
||||
'Unexpected character.', SourceLocation(_currentOffset)));
|
||||
|
@ -118,7 +124,9 @@ class Scanner {
|
|||
|
||||
bool _match(String expected) {
|
||||
if (_isAtEnd) return false;
|
||||
if (source.substring(_currentOffset, 1) != expected) return false;
|
||||
if (source.substring(_currentOffset, _currentOffset + 1) != expected) {
|
||||
return false;
|
||||
}
|
||||
_currentOffset++;
|
||||
return true;
|
||||
}
|
||||
|
@ -150,16 +158,16 @@ class Scanner {
|
|||
// We basically have three cases: hexadecimal numbers (starting with 0x),
|
||||
// numbers starting with a decimal dot and numbers starting with a digit.
|
||||
if (firstChar == '0') {
|
||||
if (!_isAtEnd && _peek() == 'x') {
|
||||
if (!_isAtEnd && (_peek() == 'x' || _peek() == 'X')) {
|
||||
_nextChar(); // consume the x
|
||||
// advance hexadecimal digits
|
||||
while (isDigit(_peek()) && _isAtEnd) {
|
||||
while (!_isAtEnd && isHexDigit(_peek())) {
|
||||
_nextChar();
|
||||
}
|
||||
_addToken(TokenType.numberLiteral);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void consumeDigits() {
|
||||
while (!_isAtEnd && isDigit(_peek())) {
|
||||
|
@ -207,7 +215,7 @@ class Scanner {
|
|||
|
||||
// ok, we've read the first part of the number. But there's more! If it's
|
||||
// not a hexadecimal number, it could be in scientific notation.
|
||||
if (!_isAtEnd && _peek() == 'e' || _peek() == 'E') {
|
||||
if (!_isAtEnd && (_peek() == 'e' || _peek() == 'E')) {
|
||||
_nextChar(); // consume e or E
|
||||
|
||||
if (_isAtEnd) {
|
||||
|
@ -232,6 +240,33 @@ class Scanner {
|
|||
.add(TokenizerError('Expected plus or minus', _currentLocation));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// ok, no scientific notation
|
||||
_addToken(TokenType.numberLiteral);
|
||||
}
|
||||
}
|
||||
|
||||
void _identifier({bool escapedInQuotes = false}) {
|
||||
if (escapedInQuotes) {
|
||||
// find the closing quote
|
||||
while (_peek() != '"' && !_isAtEnd) {
|
||||
_nextChar();
|
||||
}
|
||||
// Issue an error if the column name is unterminated
|
||||
if (_isAtEnd) {
|
||||
errors
|
||||
.add(TokenizerError('Unterminated column name', _currentLocation));
|
||||
} else {
|
||||
// consume the closing double quote
|
||||
_nextChar();
|
||||
tokens.add(IdentifierToken(true, _currentSpan));
|
||||
}
|
||||
} else {
|
||||
while (!_isAtEnd && continuesColumnName(_peek())) {
|
||||
_nextChar();
|
||||
}
|
||||
|
||||
tokens.add(IdentifierToken(false, _currentSpan));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ enum TokenType {
|
|||
|
||||
stringLiteral,
|
||||
numberLiteral,
|
||||
identifier,
|
||||
|
||||
eof,
|
||||
}
|
||||
|
@ -39,6 +40,15 @@ class StringLiteral extends Token {
|
|||
: super(TokenType.stringLiteral, span);
|
||||
}
|
||||
|
||||
class IdentifierToken extends Token {
|
||||
/// In sql, identifiers can be put in "double quotes", in which case they are
|
||||
/// always interpreted as an column name.
|
||||
final bool escapedColumnName;
|
||||
|
||||
const IdentifierToken(this.escapedColumnName, SourceSpan span)
|
||||
: super(TokenType.identifier, span);
|
||||
}
|
||||
|
||||
class TokenizerError {
|
||||
final String message;
|
||||
final SourceLocation location;
|
||||
|
|
|
@ -4,6 +4,8 @@ const _charCodeLowerA = 97; // 'a'.codeUnitAt(0);
|
|||
const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
|
||||
const _charCodeA = 65; // 'A'.codeUnitAt(0);
|
||||
const _charCodeF = 79; // 'F'.codeUnitAt(0);
|
||||
const _charCodeZ = 90; // 'Z'.codeUnitAt(0);
|
||||
const _charCodeLowerZ = 122; // 'z'.codeUnitAt(0);
|
||||
|
||||
bool isDigit(String char) {
|
||||
final code = char.codeUnitAt(0);
|
||||
|
@ -14,5 +16,17 @@ bool isHexDigit(String char) {
|
|||
final code = char.codeUnitAt(0);
|
||||
|
||||
return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
|
||||
(_charCodeA <= code && code <= _charCodeF);
|
||||
(_charCodeA <= code && code <= _charCodeF) ||
|
||||
(_charCodeZero <= code && code <= _charCodeNine);
|
||||
}
|
||||
|
||||
bool canStartColumnName(String char) {
|
||||
final code = char.codeUnitAt(0);
|
||||
return char == '_' ||
|
||||
(_charCodeLowerA <= code && code <= _charCodeLowerZ) ||
|
||||
(_charCodeA <= code && code <= _charCodeZ);
|
||||
}
|
||||
|
||||
bool continuesColumnName(String char) {
|
||||
return canStartColumnName(char) || isDigit(char);
|
||||
}
|
||||
|
|
|
@ -15,15 +15,34 @@ void expectFullToken(String token, TokenType type) {
|
|||
|
||||
if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
|
||||
fail(
|
||||
'Expected exactly one token when parsing $token, got ${tokens.length}');
|
||||
'Expected exactly one token when parsing $token, got ${tokens.length - 1}');
|
||||
}
|
||||
|
||||
expect(tokens.first.type, type, reason: '$token is a $type');
|
||||
expect(tokens.first.span.text, token);
|
||||
}
|
||||
|
||||
Map<String, TokenType> testCases = {
|
||||
'(': TokenType.leftParen,
|
||||
')': TokenType.rightParen,
|
||||
',': TokenType.comma,
|
||||
'.': TokenType.dot,
|
||||
'+': TokenType.plus,
|
||||
'-': TokenType.minus,
|
||||
'*': TokenType.star,
|
||||
'/': TokenType.slash,
|
||||
'<=': TokenType.lessEqual,
|
||||
'<': TokenType.less,
|
||||
'>=': TokenType.moreEqual,
|
||||
'>': TokenType.more,
|
||||
"'hello there'": TokenType.stringLiteral,
|
||||
'1.123': TokenType.numberLiteral,
|
||||
'1.32e5': TokenType.numberLiteral,
|
||||
'.123e-3': TokenType.numberLiteral,
|
||||
'0xFF13': TokenType.numberLiteral,
|
||||
'0Xf13A': TokenType.numberLiteral,
|
||||
'SELECT': TokenType.identifier,
|
||||
'"UPDATE"': TokenType.identifier,
|
||||
};
|
||||
|
||||
void main() {
|
||||
|
|
Loading…
Reference in New Issue