Scan identifiers

2019-06-06 22:13:42 +02:00 · 2019-06-06 22:13:42 +02:00 · c297b27f60
parent cafaafe2eb
commit c297b27f60
4 changed files with 87 additions and 9 deletions
--- a/moor_generator/lib/src/sql/parser/tokenizer/scanner.dart
+++ b/moor_generator/lib/src/sql/parser/tokenizer/scanner.dart
@ -84,12 +84,16 @@ class Scanner {
        if (_match("'")) {
          _string(binary: false);
        } else {
-          // todo probably an identifier if it doesn't start a string literal?
+          _identifier();
        }
        break;
      case "'":
        _string();
        break;
+      case '"':
+        // todo sqlite also allows string literals with double ticks, we don't
+        _identifier(escapedInQuotes: true);
+        break;
      case ' ':
      case '\t':
      case '\n':
@ -99,6 +103,8 @@ class Scanner {
      default:
        if (isDigit(char)) {
          _numeric(char);
+        } else if (canStartColumnName(char)) {
+          _identifier();
        }
        errors.add(TokenizerError(
            'Unexpected character.', SourceLocation(_currentOffset)));
@ -118,7 +124,9 @@ class Scanner {

  bool _match(String expected) {
    if (_isAtEnd) return false;
-    if (source.substring(_currentOffset, 1) != expected) return false;
+    if (source.substring(_currentOffset, _currentOffset + 1) != expected) {
+      return false;
+    }
    _currentOffset++;
    return true;
  }
@ -150,16 +158,16 @@ class Scanner {
    // We basically have three cases: hexadecimal numbers (starting with 0x),
    // numbers starting with a decimal dot and numbers starting with a digit.
    if (firstChar == '0') {
-      if (!_isAtEnd && _peek() == 'x') {
+      if (!_isAtEnd && (_peek() == 'x' || _peek() == 'X')) {
        _nextChar(); // consume the x
        // advance hexadecimal digits
-        while (isDigit(_peek()) && _isAtEnd) {
+        while (!_isAtEnd && isHexDigit(_peek())) {
          _nextChar();
+        }
        _addToken(TokenType.numberLiteral);
        return;
      }
    }
-    }

    void consumeDigits() {
      while (!_isAtEnd && isDigit(_peek())) {
@ -207,7 +215,7 @@ class Scanner {

    // ok, we've read the first part of the number. But there's more! If it's
    // not a hexadecimal number, it could be in scientific notation.
-    if (!_isAtEnd && _peek() == 'e' || _peek() == 'E') {
+    if (!_isAtEnd && (_peek() == 'e' || _peek() == 'E')) {
      _nextChar(); // consume e or E

      if (_isAtEnd) {
@ -232,6 +240,33 @@ class Scanner {
              .add(TokenizerError('Expected plus or minus', _currentLocation));
        }
      }
+    } else {
+      // ok, no scientific notation
+      _addToken(TokenType.numberLiteral);
+    }
+  }
+
+  void _identifier({bool escapedInQuotes = false}) {
+    if (escapedInQuotes) {
+      // find the closing quote
+      while (_peek() != '"' && !_isAtEnd) {
+        _nextChar();
+      }
+      // Issue an error if the column name is unterminated
+      if (_isAtEnd) {
+        errors
+            .add(TokenizerError('Unterminated column name', _currentLocation));
+      } else {
+        // consume the closing double quote
+        _nextChar();
+        tokens.add(IdentifierToken(true, _currentSpan));
+      }
+    } else {
+      while (!_isAtEnd && continuesColumnName(_peek())) {
+        _nextChar();
+      }
+
+      tokens.add(IdentifierToken(false, _currentSpan));
    }
  }
 }
--- a/moor_generator/lib/src/sql/parser/tokenizer/token.dart
+++ b/moor_generator/lib/src/sql/parser/tokenizer/token.dart
@ -17,6 +17,7 @@ enum TokenType {

  stringLiteral,
  numberLiteral,
+  identifier,

  eof,
 }
@ -39,6 +40,15 @@ class StringLiteral extends Token {
      : super(TokenType.stringLiteral, span);
 }

+class IdentifierToken extends Token {
+  /// In sql, identifiers can be put in "double quotes", in which case they are
+  /// always interpreted as an column name.
+  final bool escapedColumnName;
+
+  const IdentifierToken(this.escapedColumnName, SourceSpan span)
+      : super(TokenType.identifier, span);
+}
+
 class TokenizerError {
  final String message;
  final SourceLocation location;
--- a/moor_generator/lib/src/sql/parser/tokenizer/utils.dart
+++ b/moor_generator/lib/src/sql/parser/tokenizer/utils.dart
@ -4,6 +4,8 @@ const _charCodeLowerA = 97; // 'a'.codeUnitAt(0);
 const _charCodeLowerF = 102; // 'f'.codeUnitAt(0);
 const _charCodeA = 65; // 'A'.codeUnitAt(0);
 const _charCodeF = 79; // 'F'.codeUnitAt(0);
+const _charCodeZ = 90; // 'Z'.codeUnitAt(0);
+const _charCodeLowerZ = 122; // 'z'.codeUnitAt(0);

 bool isDigit(String char) {
  final code = char.codeUnitAt(0);
@ -14,5 +16,17 @@ bool isHexDigit(String char) {
  final code = char.codeUnitAt(0);

  return (_charCodeLowerA <= code && code <= _charCodeLowerF) ||
-      (_charCodeA <= code && code <= _charCodeF);
+      (_charCodeA <= code && code <= _charCodeF) ||
+      (_charCodeZero <= code && code <= _charCodeNine);
+}
+
+bool canStartColumnName(String char) {
+  final code = char.codeUnitAt(0);
+  return char == '_' ||
+      (_charCodeLowerA <= code && code <= _charCodeLowerZ) ||
+      (_charCodeA <= code && code <= _charCodeZ);
+}
+
+bool continuesColumnName(String char) {
+  return canStartColumnName(char) || isDigit(char);
 }
--- a/moor_generator/test/sql/scanner/single_token_tests.dart
+++ b/moor_generator/test/sql/scanner/single_token_tests.dart
@ -15,15 +15,34 @@ void expectFullToken(String token, TokenType type) {

  if (tokens.length != 2 || tokens.last.type != TokenType.eof) {
    fail(
-        'Expected exactly one token when parsing $token, got ${tokens.length}');
+        'Expected exactly one token when parsing $token, got ${tokens.length - 1}');
  }

  expect(tokens.first.type, type, reason: '$token is a $type');
+  expect(tokens.first.span.text, token);
 }

 Map<String, TokenType> testCases = {
+  '(': TokenType.leftParen,
+  ')': TokenType.rightParen,
+  ',': TokenType.comma,
  '.': TokenType.dot,
+  '+': TokenType.plus,
+  '-': TokenType.minus,
+  '*': TokenType.star,
+  '/': TokenType.slash,
+  '<=': TokenType.lessEqual,
+  '<': TokenType.less,
+  '>=': TokenType.moreEqual,
+  '>': TokenType.more,
  "'hello there'": TokenType.stringLiteral,
+  '1.123': TokenType.numberLiteral,
+  '1.32e5': TokenType.numberLiteral,
+  '.123e-3': TokenType.numberLiteral,
+  '0xFF13': TokenType.numberLiteral,
+  '0Xf13A': TokenType.numberLiteral,
+  'SELECT': TokenType.identifier,
+  '"UPDATE"': TokenType.identifier,
 };

 void main() {