mirror of https://github.com/AMT-Cheif/drift.git
Scan identifiers
This commit is contained in:
parent
cafaafe2eb
commit
c297b27f60
|
@ -84,12 +84,16 @@ class Scanner {
|
||||||
if (_match("'")) {
|
if (_match("'")) {
|
||||||
_string(binary: false);
|
_string(binary: false);
|
||||||
} else {
|
} else {
|
||||||
// todo probably an identifier if it doesn't start a string literal?
|
_identifier();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case "'":
|
case "'":
|
||||||
_string();
|
_string();
|
||||||
break;
|
break;
|
||||||
|
case '"':
|
||||||
|
// todo sqlite also allows string literals with double ticks, we don't
|
||||||
|
_identifier(escapedInQuotes: true);
|
||||||
|
break;
|
||||||
case ' ':
|
case ' ':
|
||||||
case '\t':
|
case '\t':
|
||||||
case '\n':
|
case '\n':
|
||||||
|
@ -99,6 +103,8 @@ class Scanner {
|
||||||
default:
|
default:
|
||||||
if (isDigit(char)) {
|
if (isDigit(char)) {
|
||||||
_numeric(char);
|
_numeric(char);
|
||||||
|
} else if (canStartColumnName(char)) {
|
||||||
|
_identifier();
|
||||||
}
|
}
|
||||||
errors.add(TokenizerError(
|
errors.add(TokenizerError(
|
||||||
'Unexpected character.', SourceLocation(_currentOffset)));
|
'Unexpected character.', SourceLocation(_currentOffset)));
|
||||||
|
@ -118,7 +124,9 @@ class Scanner {
|
||||||
|
|
||||||
bool _match(String expected) {
|
bool _match(String expected) {
|
||||||
if (_isAtEnd) return false;
|
if (_isAtEnd) return false;
|
||||||
if (source.substring(_currentOffset, 1) != expected) return false;
|
if (source.substring(_currentOffset, _currentOffset + 1) != expected) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
_currentOffset++;
|
_currentOffset++;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -150,14 +158,14 @@ class Scanner {
|
||||||
// We basically have three cases: hexadecimal numbers (starting with 0x),
|
// We basically have three cases: hexadecimal numbers (starting with 0x),
|
||||||
// numbers starting with a decimal dot and numbers starting with a digit.
|
// numbers starting with a decimal dot and numbers starting with a digit.
|
||||||
if (firstChar == '0') {
|
if (firstChar == '0') {
|
||||||
if (!_isAtEnd && _peek() == 'x') {
|
if (!_isAtEnd && (_peek() == 'x' || _peek() == 'X')) {
|
||||||
_nextChar(); // consume the x
|
_nextChar(); // consume the x
|
||||||
// advance hexadecimal digits
|
// advance hexadecimal digits
|
||||||
while (isDigit(_peek()) && _isAtEnd) {
|
while (!_isAtEnd && isHexDigit(_peek())) {
|
||||||
_nextChar();
|
_nextChar();
|
||||||
_addToken(TokenType.numberLiteral);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
_addToken(TokenType.numberLiteral);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,7 +215,7 @@ class Scanner {
|
||||||
|
|
||||||
// ok, we've read the first part of the number. But there's more! If it's
|
// ok, we've read the first part of the number. But there's more! If it's
|
||||||
// not a hexadecimal number, it could be in scientific notation.
|
// not a hexadecimal number, it could be in scientific notation.
|
||||||
if (!_isAtEnd && _peek() == 'e' || _peek() == 'E') {
|
if (!_isAtEnd && (_peek() == 'e' || _peek() == 'E')) {
|
||||||
_nextChar(); // consume e or E
|
_nextChar(); // consume e or E
|
||||||
|
|
||||||
if (_isAtEnd) {
|
if (_isAtEnd) {
|
||||||
|
@ -232,6 +240,33 @@ class Scanner {
|
||||||
.add(TokenizerError('Expected plus or minus', _currentLocation));
|
.add(TokenizerError('Expected plus or minus', _currentLocation));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// ok, no scientific notation
|
||||||
|
_addToken(TokenType.numberLiteral);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void _identifier({bool escapedInQuotes = false}) {
|
||||||
|
if (escapedInQuotes) {
|
||||||
|
// find the closing quote
|
||||||
|
while (_peek() != '"' && !_isAtEnd) {
|
||||||
|
_nextChar();
|
||||||
|
}
|
||||||
|
// Issue an error if the column name is unterminated
|
||||||
|
if (_isAtEnd) {
|
||||||
|
errors
|
||||||
|
.add(TokenizerError('Unterminated column name', _currentLocation));
|
||||||
|
} else {
|
||||||
|
// consume the closing double quote
|
||||||
|
_nextChar();
|
||||||
|
tokens.add(IdentifierToken(true, _currentSpan));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (!_isAtEnd && continuesColumnName(_peek())) {
|
||||||
|
_nextChar();
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens.add(IdentifierToken(false, _currentSpan));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@ enum TokenType {
|
||||||
|
|
||||||
stringLiteral,
|
stringLiteral,
|
||||||
numberLiteral,
|
numberLiteral,
|
||||||
|
identifier,
|
||||||
|
|
||||||
eof,
|
eof,
|
||||||
}
|
}
|
||||||
|
@ -39,6 +40,15 @@ class StringLiteral extends Token {
|
||||||
: super(TokenType.stringLiteral, span);
|
: super(TokenType.stringLiteral, span);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class IdentifierToken extends Token {
|
||||||
|
/// In sql, identifiers can be put in "double quotes", in which case they are
|
||||||
|
/// always interpreted as an column name.
|
||||||
|
final bool escapedColumnName;
|
||||||
|
|
||||||
|
const IdentifierToken(this.escapedColumnName, SourceSpan span)
|
||||||
|
: super(TokenType.identifier, span);
|
||||||
|
}
|
||||||
|
|
||||||
class TokenizerError {
|
class TokenizerError {
|
||||||
final String message;
|
final String message;
|
||||||
final SourceLocation location;
|
final SourceLocation location;
|
||||||
|
|
|
@ -4,6 +4,8 @@ const _charCodeLowerA = 97; // 'a'.codeUnitAt(0);
|
||||||
const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
|
const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
|
||||||
const _charCodeA = 65; // 'A'.codeUnitAt(0);
|
const _charCodeA = 65; // 'A'.codeUnitAt(0);
|
||||||
const _charCodeF = 79; // 'F'.codeUnitAt(0);
|
const _charCodeF = 79; // 'F'.codeUnitAt(0);
|
||||||
|
const _charCodeZ = 90; // 'Z'.codeUnitAt(0);
|
||||||
|
const _charCodeLowerZ = 122; // 'z'.codeUnitAt(0);
|
||||||
|
|
||||||
bool isDigit(String char) {
|
bool isDigit(String char) {
|
||||||
final code = char.codeUnitAt(0);
|
final code = char.codeUnitAt(0);
|
||||||
|
@ -14,5 +16,17 @@ bool isHexDigit(String char) {
|
||||||
final code = char.codeUnitAt(0);
|
final code = char.codeUnitAt(0);
|
||||||
|
|
||||||
return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
|
return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
|
||||||
(_charCodeA <= code && code <= _charCodeF);
|
(_charCodeA <= code && code <= _charCodeF) ||
|
||||||
|
(_charCodeZero <= code && code <= _charCodeNine);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool canStartColumnName(String char) {
|
||||||
|
final code = char.codeUnitAt(0);
|
||||||
|
return char == '_' ||
|
||||||
|
(_charCodeLowerA <= code && code <= _charCodeLowerZ) ||
|
||||||
|
(_charCodeA <= code && code <= _charCodeZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool continuesColumnName(String char) {
|
||||||
|
return canStartColumnName(char) || isDigit(char);
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,15 +15,34 @@ void expectFullToken(String token, TokenType type) {
|
||||||
|
|
||||||
if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
|
if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
|
||||||
fail(
|
fail(
|
||||||
'Expected exactly one token when parsing $token, got ${tokens.length}');
|
'Expected exactly one token when parsing $token, got ${tokens.length - 1}');
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(tokens.first.type, type, reason: '$token is a $type');
|
expect(tokens.first.type, type, reason: '$token is a $type');
|
||||||
|
expect(tokens.first.span.text, token);
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, TokenType> testCases = {
|
Map<String, TokenType> testCases = {
|
||||||
|
'(': TokenType.leftParen,
|
||||||
|
')': TokenType.rightParen,
|
||||||
|
',': TokenType.comma,
|
||||||
'.': TokenType.dot,
|
'.': TokenType.dot,
|
||||||
|
'+': TokenType.plus,
|
||||||
|
'-': TokenType.minus,
|
||||||
|
'*': TokenType.star,
|
||||||
|
'/': TokenType.slash,
|
||||||
|
'<=': TokenType.lessEqual,
|
||||||
|
'<': TokenType.less,
|
||||||
|
'>=': TokenType.moreEqual,
|
||||||
|
'>': TokenType.more,
|
||||||
"'hello there'": TokenType.stringLiteral,
|
"'hello there'": TokenType.stringLiteral,
|
||||||
|
'1.123': TokenType.numberLiteral,
|
||||||
|
'1.32e5': TokenType.numberLiteral,
|
||||||
|
'.123e-3': TokenType.numberLiteral,
|
||||||
|
'0xFF13': TokenType.numberLiteral,
|
||||||
|
'0Xf13A': TokenType.numberLiteral,
|
||||||
|
'SELECT': TokenType.identifier,
|
||||||
|
'"UPDATE"': TokenType.identifier,
|
||||||
};
|
};
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
|
|
Loading…
Reference in New Issue