Scan identifiers

This commit is contained in:
Simon Binder 2019-06-06 22:13:42 +02:00
parent cafaafe2eb
commit c297b27f60
No known key found for this signature in database
GPG Key ID: 7891917E4147B8C0
4 changed files with 87 additions and 9 deletions

View File

@ -84,12 +84,16 @@ class Scanner {
if (_match("'")) {
_string(binary: false);
} else {
// todo probably an identifier if it doesn't start a string literal?
_identifier();
}
break;
case "'":
_string();
break;
case '"':
// todo sqlite also allows string literals with double ticks, we don't
_identifier(escapedInQuotes: true);
break;
case ' ':
case '\t':
case '\n':
@ -99,6 +103,8 @@ class Scanner {
default:
if (isDigit(char)) {
_numeric(char);
} else if (canStartColumnName(char)) {
_identifier();
}
errors.add(TokenizerError(
'Unexpected character.', SourceLocation(_currentOffset)));
@ -118,7 +124,9 @@ class Scanner {
bool _match(String expected) {
if (_isAtEnd) return false;
if (source.substring(_currentOffset, 1) != expected) return false;
if (source.substring(_currentOffset, _currentOffset + 1) != expected) {
return false;
}
_currentOffset++;
return true;
}
@ -150,14 +158,14 @@ class Scanner {
// We basically have three cases: hexadecimal numbers (starting with 0x),
// numbers starting with a decimal dot and numbers starting with a digit.
if (firstChar == '0') {
if (!_isAtEnd && _peek() == 'x') {
if (!_isAtEnd && (_peek() == 'x' || _peek() == 'X')) {
_nextChar(); // consume the x
// advance hexadecimal digits
while (isDigit(_peek()) && _isAtEnd) {
while (!_isAtEnd && isHexDigit(_peek())) {
_nextChar();
_addToken(TokenType.numberLiteral);
return;
}
_addToken(TokenType.numberLiteral);
return;
}
}
@ -207,7 +215,7 @@ class Scanner {
// ok, we've read the first part of the number. But there's more! If it's
// not a hexadecimal number, it could be in scientific notation.
if (!_isAtEnd && _peek() == 'e' || _peek() == 'E') {
if (!_isAtEnd && (_peek() == 'e' || _peek() == 'E')) {
_nextChar(); // consume e or E
if (_isAtEnd) {
@ -232,6 +240,33 @@ class Scanner {
.add(TokenizerError('Expected plus or minus', _currentLocation));
}
}
} else {
// ok, no scientific notation
_addToken(TokenType.numberLiteral);
}
}
void _identifier({bool escapedInQuotes = false}) {
if (escapedInQuotes) {
// find the closing quote
while (_peek() != '"' && !_isAtEnd) {
_nextChar();
}
// Issue an error if the column name is unterminated
if (_isAtEnd) {
errors
.add(TokenizerError('Unterminated column name', _currentLocation));
} else {
// consume the closing double quote
_nextChar();
tokens.add(IdentifierToken(true, _currentSpan));
}
} else {
while (!_isAtEnd && continuesColumnName(_peek())) {
_nextChar();
}
tokens.add(IdentifierToken(false, _currentSpan));
}
}
}

View File

@ -17,6 +17,7 @@ enum TokenType {
stringLiteral,
numberLiteral,
identifier,
eof,
}
@ -39,6 +40,15 @@ class StringLiteral extends Token {
: super(TokenType.stringLiteral, span);
}
class IdentifierToken extends Token {
/// In sql, identifiers can be put in "double quotes", in which case they are
/// always interpreted as an column name.
final bool escapedColumnName;
const IdentifierToken(this.escapedColumnName, SourceSpan span)
: super(TokenType.identifier, span);
}
class TokenizerError {
final String message;
final SourceLocation location;

View File

@ -4,6 +4,8 @@ const _charCodeLowerA = 97; // 'a'.codeUnitAt(0);
const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
const _charCodeA = 65; // 'A'.codeUnitAt(0);
const _charCodeF = 79; // 'F'.codeUnitAt(0);
const _charCodeZ = 90; // 'Z'.codeUnitAt(0);
const _charCodeLowerZ = 122; // 'z'.codeUnitAt(0);
bool isDigit(String char) {
final code = char.codeUnitAt(0);
@ -14,5 +16,17 @@ bool isHexDigit(String char) {
final code = char.codeUnitAt(0);
return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
(_charCodeA <= code && code <= _charCodeF);
(_charCodeA <= code && code <= _charCodeF) ||
(_charCodeZero <= code && code <= _charCodeNine);
}
bool canStartColumnName(String char) {
final code = char.codeUnitAt(0);
return char == '_' ||
(_charCodeLowerA <= code && code <= _charCodeLowerZ) ||
(_charCodeA <= code && code <= _charCodeZ);
}
bool continuesColumnName(String char) {
return canStartColumnName(char) || isDigit(char);
}

View File

@ -15,15 +15,34 @@ void expectFullToken(String token, TokenType type) {
if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
fail(
'Expected exactly one token when parsing $token, got ${tokens.length}');
'Expected exactly one token when parsing $token, got ${tokens.length - 1}');
}
expect(tokens.first.type, type, reason: '$token is a $type');
expect(tokens.first.span.text, token);
}
Map<String, TokenType> testCases = {
'(': TokenType.leftParen,
')': TokenType.rightParen,
',': TokenType.comma,
'.': TokenType.dot,
'+': TokenType.plus,
'-': TokenType.minus,
'*': TokenType.star,
'/': TokenType.slash,
'<=': TokenType.lessEqual,
'<': TokenType.less,
'>=': TokenType.moreEqual,
'>': TokenType.more,
"'hello there'": TokenType.stringLiteral,
'1.123': TokenType.numberLiteral,
'1.32e5': TokenType.numberLiteral,
'.123e-3': TokenType.numberLiteral,
'0xFF13': TokenType.numberLiteral,
'0Xf13A': TokenType.numberLiteral,
'SELECT': TokenType.identifier,
'"UPDATE"': TokenType.identifier,
};
void main() {