Scan identifiers

This commit is contained in:
Simon Binder 2019-06-06 22:13:42 +02:00
parent cafaafe2eb
commit c297b27f60
No known key found for this signature in database
GPG Key ID: 7891917E4147B8C0
4 changed files with 87 additions and 9 deletions

View File

@ -84,12 +84,16 @@ class Scanner {
if (_match("'")) { if (_match("'")) {
_string(binary: false); _string(binary: false);
} else { } else {
// todo probably an identifier if it doesn't start a string literal? _identifier();
} }
break; break;
case "'": case "'":
_string(); _string();
break; break;
case '"':
// todo sqlite also allows string literals with double ticks, we don't
_identifier(escapedInQuotes: true);
break;
case ' ': case ' ':
case '\t': case '\t':
case '\n': case '\n':
@ -99,6 +103,8 @@ class Scanner {
default: default:
if (isDigit(char)) { if (isDigit(char)) {
_numeric(char); _numeric(char);
} else if (canStartColumnName(char)) {
_identifier();
} }
errors.add(TokenizerError( errors.add(TokenizerError(
'Unexpected character.', SourceLocation(_currentOffset))); 'Unexpected character.', SourceLocation(_currentOffset)));
@ -118,7 +124,9 @@ class Scanner {
bool _match(String expected) { bool _match(String expected) {
if (_isAtEnd) return false; if (_isAtEnd) return false;
if (source.substring(_currentOffset, 1) != expected) return false; if (source.substring(_currentOffset, _currentOffset + 1) != expected) {
return false;
}
_currentOffset++; _currentOffset++;
return true; return true;
} }
@ -150,14 +158,14 @@ class Scanner {
// We basically have three cases: hexadecimal numbers (starting with 0x), // We basically have three cases: hexadecimal numbers (starting with 0x),
// numbers starting with a decimal dot and numbers starting with a digit. // numbers starting with a decimal dot and numbers starting with a digit.
if (firstChar == '0') { if (firstChar == '0') {
if (!_isAtEnd && _peek() == 'x') { if (!_isAtEnd && (_peek() == 'x' || _peek() == 'X')) {
_nextChar(); // consume the x _nextChar(); // consume the x
// advance hexadecimal digits // advance hexadecimal digits
while (isDigit(_peek()) && _isAtEnd) { while (!_isAtEnd && isHexDigit(_peek())) {
_nextChar(); _nextChar();
_addToken(TokenType.numberLiteral);
return;
} }
_addToken(TokenType.numberLiteral);
return;
} }
} }
@ -207,7 +215,7 @@ class Scanner {
// ok, we've read the first part of the number. But there's more! If it's // ok, we've read the first part of the number. But there's more! If it's
// not a hexadecimal number, it could be in scientific notation. // not a hexadecimal number, it could be in scientific notation.
if (!_isAtEnd && _peek() == 'e' || _peek() == 'E') { if (!_isAtEnd && (_peek() == 'e' || _peek() == 'E')) {
_nextChar(); // consume e or E _nextChar(); // consume e or E
if (_isAtEnd) { if (_isAtEnd) {
@ -232,6 +240,33 @@ class Scanner {
.add(TokenizerError('Expected plus or minus', _currentLocation)); .add(TokenizerError('Expected plus or minus', _currentLocation));
} }
} }
} else {
// ok, no scientific notation
_addToken(TokenType.numberLiteral);
}
}
void _identifier({bool escapedInQuotes = false}) {
if (escapedInQuotes) {
// find the closing quote
while (_peek() != '"' && !_isAtEnd) {
_nextChar();
}
// Issue an error if the column name is unterminated
if (_isAtEnd) {
errors
.add(TokenizerError('Unterminated column name', _currentLocation));
} else {
// consume the closing double quote
_nextChar();
tokens.add(IdentifierToken(true, _currentSpan));
}
} else {
while (!_isAtEnd && continuesColumnName(_peek())) {
_nextChar();
}
tokens.add(IdentifierToken(false, _currentSpan));
} }
} }
} }

View File

@ -17,6 +17,7 @@ enum TokenType {
stringLiteral, stringLiteral,
numberLiteral, numberLiteral,
identifier,
eof, eof,
} }
@ -39,6 +40,15 @@ class StringLiteral extends Token {
: super(TokenType.stringLiteral, span); : super(TokenType.stringLiteral, span);
} }
class IdentifierToken extends Token {
/// In sql, identifiers can be put in "double quotes", in which case they are
/// always interpreted as an column name.
final bool escapedColumnName;
const IdentifierToken(this.escapedColumnName, SourceSpan span)
: super(TokenType.identifier, span);
}
class TokenizerError { class TokenizerError {
final String message; final String message;
final SourceLocation location; final SourceLocation location;

View File

@ -4,6 +4,8 @@ const _charCodeLowerA = 97; // 'a'.codeUnitAt(0);
const _charCodeLowerF = 102; // 'f'.codeUnitAt(0); const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
const _charCodeA = 65; // 'A'.codeUnitAt(0); const _charCodeA = 65; // 'A'.codeUnitAt(0);
const _charCodeF = 79; // 'F'.codeUnitAt(0); const _charCodeF = 79; // 'F'.codeUnitAt(0);
const _charCodeZ = 90; // 'Z'.codeUnitAt(0);
const _charCodeLowerZ = 122; // 'z'.codeUnitAt(0);
bool isDigit(String char) { bool isDigit(String char) {
final code = char.codeUnitAt(0); final code = char.codeUnitAt(0);
@ -14,5 +16,17 @@ bool isHexDigit(String char) {
final code = char.codeUnitAt(0); final code = char.codeUnitAt(0);
return (_charCodeLowerA <= code && code <= _charCodeLowerF) || return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
(_charCodeA <= code && code <= _charCodeF); (_charCodeA <= code && code <= _charCodeF) ||
(_charCodeZero <= code && code <= _charCodeNine);
}
bool canStartColumnName(String char) {
final code = char.codeUnitAt(0);
return char == '_' ||
(_charCodeLowerA <= code && code <= _charCodeLowerZ) ||
(_charCodeA <= code && code <= _charCodeZ);
}
bool continuesColumnName(String char) {
return canStartColumnName(char) || isDigit(char);
} }

View File

@ -15,15 +15,34 @@ void expectFullToken(String token, TokenType type) {
if (tokens.length != 2 || tokens.last.type != TokenType.eof) { if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
fail( fail(
'Expected exactly one token when parsing $token, got ${tokens.length}'); 'Expected exactly one token when parsing $token, got ${tokens.length - 1}');
} }
expect(tokens.first.type, type, reason: '$token is a $type'); expect(tokens.first.type, type, reason: '$token is a $type');
expect(tokens.first.span.text, token);
} }
Map<String, TokenType> testCases = { Map<String, TokenType> testCases = {
'(': TokenType.leftParen,
')': TokenType.rightParen,
',': TokenType.comma,
'.': TokenType.dot, '.': TokenType.dot,
'+': TokenType.plus,
'-': TokenType.minus,
'*': TokenType.star,
'/': TokenType.slash,
'<=': TokenType.lessEqual,
'<': TokenType.less,
'>=': TokenType.moreEqual,
'>': TokenType.more,
"'hello there'": TokenType.stringLiteral, "'hello there'": TokenType.stringLiteral,
'1.123': TokenType.numberLiteral,
'1.32e5': TokenType.numberLiteral,
'.123e-3': TokenType.numberLiteral,
'0xFF13': TokenType.numberLiteral,
'0Xf13A': TokenType.numberLiteral,
'SELECT': TokenType.identifier,
'"UPDATE"': TokenType.identifier,
}; };
void main() { void main() {