Scan identifiers

2019-06-06 22:13:42 +02:00 · 2019-06-06 22:13:42 +02:00 · c297b27f60
parent cafaafe2eb
commit c297b27f60
4 changed files with 87 additions and 9 deletions
--- a/moor_generator/lib/src/sql/parser/tokenizer/scanner.dart
+++ b/moor_generator/lib/src/sql/parser/tokenizer/scanner.dart
@ -84,12 +84,16 @@ class Scanner {
        if (_match("'")) {
          _string(binary: false);
        } else {
-          // todo probably an identifier if it doesn't start a string literal?
+          _identifier();
        }
        break;
      case "'":
        _string();
        break;
      case '"':
        // todo sqlite also allows string literals with double ticks, we don't
        _identifier(escapedInQuotes: true);
        break;
      case ' ':
      case '\t':
      case '\n':
@ -99,6 +103,8 @@ class Scanner {
      default:
        if (isDigit(char)) {
          _numeric(char);
        } else if (canStartColumnName(char)) {
          _identifier();
        }
        errors.add(TokenizerError(
            'Unexpected character.', SourceLocation(_currentOffset)));
@ -118,7 +124,9 @@ class Scanner {
  bool _match(String expected) {
    if (_isAtEnd) return false;
-    if (source.substring(_currentOffset, 1) != expected) return false;
+    if (source.substring(_currentOffset, _currentOffset + 1) != expected) {
      return false;
    }
    _currentOffset++;
    return true;
  }
@ -150,14 +158,14 @@ class Scanner {
    // We basically have three cases: hexadecimal numbers (starting with 0x),
    // numbers starting with a decimal dot and numbers starting with a digit.
    if (firstChar == '0') {
-      if (!_isAtEnd && _peek() == 'x') {
+      if (!_isAtEnd && (_peek() == 'x' || _peek() == 'X')) {
        _nextChar(); // consume the x
        // advance hexadecimal digits
-        while (isDigit(_peek()) && _isAtEnd) {
+        while (!_isAtEnd && isHexDigit(_peek())) {
          _nextChar();
          _addToken(TokenType.numberLiteral);
          return;
        }
        _addToken(TokenType.numberLiteral);
        return;
      }
    }
@ -207,7 +215,7 @@ class Scanner {
    // ok, we've read the first part of the number. But there's more! If it's
    // not a hexadecimal number, it could be in scientific notation.
-    if (!_isAtEnd && _peek() == 'e' || _peek() == 'E') {
+    if (!_isAtEnd && (_peek() == 'e' || _peek() == 'E')) {
      _nextChar(); // consume e or E
      if (_isAtEnd) {
@ -232,6 +240,33 @@ class Scanner {
              .add(TokenizerError('Expected plus or minus', _currentLocation));
        }
      }
    } else {
      // ok, no scientific notation
      _addToken(TokenType.numberLiteral);
    }
  }
  void _identifier({bool escapedInQuotes = false}) {
    if (escapedInQuotes) {
      // find the closing quote
      while (_peek() != '"' && !_isAtEnd) {
        _nextChar();
      }
      // Issue an error if the column name is unterminated
      if (_isAtEnd) {
        errors
            .add(TokenizerError('Unterminated column name', _currentLocation));
      } else {
        // consume the closing double quote
        _nextChar();
        tokens.add(IdentifierToken(true, _currentSpan));
      }
    } else {
      while (!_isAtEnd && continuesColumnName(_peek())) {
        _nextChar();
      }
      tokens.add(IdentifierToken(false, _currentSpan));
    }
  }
 }
--- a/moor_generator/lib/src/sql/parser/tokenizer/token.dart
+++ b/moor_generator/lib/src/sql/parser/tokenizer/token.dart
@ -17,6 +17,7 @@ enum TokenType {
  stringLiteral,
  numberLiteral,
  identifier,
  eof,
 }
@ -39,6 +40,15 @@ class StringLiteral extends Token {
      : super(TokenType.stringLiteral, span);
 }
 class IdentifierToken extends Token {
  /// In sql, identifiers can be put in "double quotes", in which case they are
  /// always interpreted as an column name.
  final bool escapedColumnName;
  const IdentifierToken(this.escapedColumnName, SourceSpan span)
      : super(TokenType.identifier, span);
 }
 class TokenizerError {
  final String message;
  final SourceLocation location;
--- a/moor_generator/lib/src/sql/parser/tokenizer/utils.dart
+++ b/moor_generator/lib/src/sql/parser/tokenizer/utils.dart
@ -4,6 +4,8 @@ const _charCodeLowerA = 97; // 'a'.codeUnitAt(0);
 const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
 const _charCodeA = 65; // 'A'.codeUnitAt(0);
 const _charCodeF = 79; // 'F'.codeUnitAt(0);
 const _charCodeZ = 90; // 'Z'.codeUnitAt(0);
 const _charCodeLowerZ = 122; // 'z'.codeUnitAt(0);
 bool isDigit(String char) {
  final code = char.codeUnitAt(0);
@ -14,5 +16,17 @@ bool isHexDigit(String char) {
  final code = char.codeUnitAt(0);
  return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
-      (_charCodeA <= code && code <= _charCodeF);
+      (_charCodeA <= code && code <= _charCodeF) ||
      (_charCodeZero <= code && code <= _charCodeNine);
 }
 bool canStartColumnName(String char) {
  final code = char.codeUnitAt(0);
  return char == '_' ||
      (_charCodeLowerA <= code && code <= _charCodeLowerZ) ||
      (_charCodeA <= code && code <= _charCodeZ);
 }
 bool continuesColumnName(String char) {
  return canStartColumnName(char) || isDigit(char);
 }
--- a/moor_generator/test/sql/scanner/single_token_tests.dart
+++ b/moor_generator/test/sql/scanner/single_token_tests.dart
@ -15,15 +15,34 @@ void expectFullToken(String token, TokenType type) {
  if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
    fail(
-        'Expected exactly one token when parsing $token, got ${tokens.length}');
+        'Expected exactly one token when parsing $token, got ${tokens.length - 1}');
  }
  expect(tokens.first.type, type, reason: '$token is a $type');
  expect(tokens.first.span.text, token);
 }
 Map<String, TokenType> testCases = {
  '(': TokenType.leftParen,
  ')': TokenType.rightParen,
  ',': TokenType.comma,
  '.': TokenType.dot,
  '+': TokenType.plus,
  '-': TokenType.minus,
  '*': TokenType.star,
  '/': TokenType.slash,
  '<=': TokenType.lessEqual,
  '<': TokenType.less,
  '>=': TokenType.moreEqual,
  '>': TokenType.more,
  "'hello there'": TokenType.stringLiteral,
  '1.123': TokenType.numberLiteral,
  '1.32e5': TokenType.numberLiteral,
  '.123e-3': TokenType.numberLiteral,
  '0xFF13': TokenType.numberLiteral,
  '0Xf13A': TokenType.numberLiteral,
  'SELECT': TokenType.identifier,
  '"UPDATE"': TokenType.identifier,
 };
 void main() {