From ff530dd4eac186fa511710d1fa19286bdf9c6cbb Mon Sep 17 00:00:00 2001 From: Simon Binder Date: Sat, 29 Jun 2019 22:29:16 +0200 Subject: [PATCH] Improve documentation of the sqlparser library --- .../lib/src/parser/sql/sql_parser.dart | 15 +++++++-- sqlparser/README.md | 12 +++++-- sqlparser/lib/sqlparser.dart | 2 +- sqlparser/lib/src/analysis/analysis.dart | 1 + sqlparser/lib/src/analysis/context.dart | 16 ++++++++++ sqlparser/lib/src/analysis/schema/column.dart | 10 ++++++ .../lib/src/analysis/schema/references.dart | 1 + sqlparser/lib/src/analysis/schema/table.dart | 10 +++++- .../src/analysis/steps/column_resolver.dart | 4 +++ .../analysis/steps/reference_resolver.dart | 1 + .../lib/src/analysis/steps/type_resolver.dart | 2 ++ sqlparser/lib/src/analysis/types/data.dart | 9 +++++- .../lib/src/analysis/types/resolver.dart | 20 ++++++++++-- sqlparser/lib/src/ast/ast.dart | 32 +++++++++++++++++-- sqlparser/lib/src/engine/sql_engine.dart | 28 +++++++++++++--- sqlparser/lib/src/reader/tokenizer/token.dart | 13 ++++++++ sqlparser/test/parser/select/from_test.dart | 1 + .../test/parser/select/group_by_test.dart | 1 + sqlparser/test/parser/select/limit_test.dart | 1 + .../test/parser/select/order_by_test.dart | 1 + 20 files changed, 165 insertions(+), 15 deletions(-) diff --git a/moor_generator/lib/src/parser/sql/sql_parser.dart b/moor_generator/lib/src/parser/sql/sql_parser.dart index b3ae2c64..bc8fae23 100644 --- a/moor_generator/lib/src/parser/sql/sql_parser.dart +++ b/moor_generator/lib/src/parser/sql/sql_parser.dart @@ -60,6 +60,10 @@ class SqlParser { } ColumnType _resolvedToMoor(ResolvedType type) { + if (type == null) { + return ColumnType.text; + } + switch (type.type) { case BasicType.nullType: return ColumnType.text; @@ -87,7 +91,14 @@ class SqlParser { final name = query.getField('name').toStringValue(); final sql = query.getField('query').toStringValue(); - final context = _engine.analyze(sql); + AnalysisContext context; + try { + context = _engine.analyze(sql); + } catch (e, s) { + errors.add(MoorError( + critical: true, + message: 'Error while trying to parse $sql: $e, $s')); + } for (var error in context.errors) { errors.add(MoorError( @@ -99,7 +110,7 @@ class SqlParser { if (root is SelectStatement) { _handleSelect(name, root, context); } else { - throw StateError('Unexpected sql'); + throw StateError('Unexpected sql, expected a select statement'); } } } diff --git a/sqlparser/README.md b/sqlparser/README.md index 5bfedf39..21c3942f 100644 --- a/sqlparser/README.md +++ b/sqlparser/README.md @@ -13,7 +13,7 @@ You can parse the abstract syntax tree of sqlite statements with `SqlEngine.pars import 'package:sqlparser/sqlparser.dart'; final engine = SqlEngine(); -final stmt = engine.parse(''' +final result = engine.parse(''' SELECT f.* FROM frameworks f INNER JOIN uses_language ul ON ul.framework = f.id INNER JOIN languages l ON l.id = ul.language @@ -21,6 +21,7 @@ WHERE l.name = 'Dart' ORDER BY f.name ASC, f.popularity DESC LIMIT 5 OFFSET 5 * 3 '''); +// result.rootNode contains the select statement in tree form ``` ### Analysis @@ -55,8 +56,15 @@ resolvedColumns.map((c) => c.name)); // id, content, id, content, 3 + 4 resolvedColumns.map((c) => context.typeOf(c).type.type) // int, text, int, text, int, int ``` +## Limitations +- For now, only `SELECT` and `DELETE` expressions are implemented, `UPDATE` and `INSERT` will follow + soon. +- Windowing is not supported yet +- Common table expressions and compound select statements `UNION` / `INTERSECT` are not supported + and probably won't be in the near future. + ## Thanks - To [Bob Nystrom](https://github.com/munificent) for his amazing ["Crafting Interpreters"](https://craftinginterpreters.com/) book, which was incredibly helpful when writing the parser. -- All authors of [SQLDelight](https://github.com/square/sqldelight). This library uses their algorithm +- To the authors of [SQLDelight](https://github.com/square/sqldelight). This library uses their algorithm for type inference. diff --git a/sqlparser/lib/sqlparser.dart b/sqlparser/lib/sqlparser.dart index 15f79dec..ea6e92ba 100644 --- a/sqlparser/lib/sqlparser.dart +++ b/sqlparser/lib/sqlparser.dart @@ -4,4 +4,4 @@ library sqlparser; export 'src/analysis/analysis.dart'; export 'src/ast/ast.dart'; export 'src/engine/sql_engine.dart'; -export 'src/reader/tokenizer/token.dart'; +export 'src/reader/tokenizer/token.dart' show CumulatedTokenizerException; diff --git a/sqlparser/lib/src/analysis/analysis.dart b/sqlparser/lib/src/analysis/analysis.dart index 5ad43abe..59757772 100644 --- a/sqlparser/lib/src/analysis/analysis.dart +++ b/sqlparser/lib/src/analysis/analysis.dart @@ -2,6 +2,7 @@ import 'dart:math'; import 'package:meta/meta.dart'; import 'package:sqlparser/sqlparser.dart'; +import 'package:sqlparser/src/reader/tokenizer/token.dart'; part 'schema/column.dart'; part 'schema/references.dart'; diff --git a/sqlparser/lib/src/analysis/context.dart b/sqlparser/lib/src/analysis/context.dart index 6ff7dcb1..379cd015 100644 --- a/sqlparser/lib/src/analysis/context.dart +++ b/sqlparser/lib/src/analysis/context.dart @@ -1,16 +1,32 @@ part of 'analysis.dart'; +/// Result of parsing and analyzing an sql statement. Contains the AST with +/// resolved references, information about result columns and errors that were +/// reported during analysis. class AnalysisContext { + /// All errors that occurred during analysis final List errors = []; + + /// The root node of the abstract syntax tree final AstNode root; + + /// The raw sql statement that was used to construct this [AnalysisContext]. final String sql; + + /// A resolver that can be used to obtain the type of a [Typeable]. This + /// mostly applies to [Expression]s, [Reference]s, [Variable]s and + /// [ResultSet.resolvedColumns] of a select statement. final TypeResolver types = TypeResolver(); + /// Constructs a new analysis context from the AST and the source sql. AnalysisContext(this.root, this.sql); + /// Reports an analysis error. void reportError(AnalysisError error) { errors.add(error); } + /// Obtains the result of any typeable component. See the information at + /// [types] on important [Typeable]s. ResolveResult typeOf(Typeable t) => types.resolveOrInfer(t); } diff --git a/sqlparser/lib/src/analysis/schema/column.dart b/sqlparser/lib/src/analysis/schema/column.dart index c8381ad6..e860ab6e 100644 --- a/sqlparser/lib/src/analysis/schema/column.dart +++ b/sqlparser/lib/src/analysis/schema/column.dart @@ -1,24 +1,34 @@ part of '../analysis.dart'; +/// A column that appears in a [ResultSet]. Has a type and a name. abstract class Column with Referencable implements Typeable { + /// The name of this column in the result set. String get name; const Column(); } +/// A column that is part of a table. class TableColumn extends Column { @override final String name; + + /// The type of this column, which is immediately available. final ResolvedType type; + /// The table this column belongs to. Table table; TableColumn(this.name, this.type); } +/// A column that is created by an expression. For instance, in the select +/// statement "SELECT 1 + 3", there is a column called "1 + 3" of type int. class ExpressionColumn extends Column { @override final String name; + + /// The expression returned by this column. final Expression expression; ExpressionColumn({@required this.name, this.expression}); diff --git a/sqlparser/lib/src/analysis/schema/references.dart b/sqlparser/lib/src/analysis/schema/references.dart index 82e4e22f..5f8318d8 100644 --- a/sqlparser/lib/src/analysis/schema/references.dart +++ b/sqlparser/lib/src/analysis/schema/references.dart @@ -44,6 +44,7 @@ class ReferenceScope { return ReferenceScope(this, root: effectiveRoot); } + /// Registers something that can be referenced in this and child scopes. void register(String identifier, Referencable ref) { _references.putIfAbsent(identifier.toUpperCase(), () => []).add(ref); } diff --git a/sqlparser/lib/src/analysis/schema/table.dart b/sqlparser/lib/src/analysis/schema/table.dart index c44789cc..ee53adb9 100644 --- a/sqlparser/lib/src/analysis/schema/table.dart +++ b/sqlparser/lib/src/analysis/schema/table.dart @@ -1,11 +1,14 @@ part of '../analysis.dart'; -/// Something that will resolve to a result set. +/// Something that will resolve to an [ResultSet] when referred to via +/// the [ReferenceScope]. abstract class ResolvesToResultSet with Referencable { ResultSet get resultSet; } +/// Something that returns a set of columns when evaluated. abstract class ResultSet implements ResolvesToResultSet { + /// The columns that will be returned when evaluating this query. List get resolvedColumns; @override @@ -17,12 +20,17 @@ abstract class ResultSet implements ResolvesToResultSet { } } +/// A database table. The information stored here will be used to resolve +/// references and for type inference. class Table with ResultSet, VisibleToChildren { + /// The name of this table, as it appears in sql statements. This should be + /// the raw name, not an escaped version. final String name; @override final List resolvedColumns; + /// Constructs a table from the known [name] and [resolvedColumns]. Table({@required this.name, this.resolvedColumns}) { for (var column in resolvedColumns) { column.table = this; diff --git a/sqlparser/lib/src/analysis/steps/column_resolver.dart b/sqlparser/lib/src/analysis/steps/column_resolver.dart index a27ae0c9..db135203 100644 --- a/sqlparser/lib/src/analysis/steps/column_resolver.dart +++ b/sqlparser/lib/src/analysis/steps/column_resolver.dart @@ -1,5 +1,9 @@ part of '../analysis.dart'; +/// Walks the AST and, for each select statement it sees, finds out which +/// columns are returned and which columns are available. For instance, when +/// we have a table "t" with two columns "a" and "b", the select statement +/// "SELECT a FROM t" has one result column but two columns available. class ColumnResolver extends RecursiveVisitor { final AnalysisContext context; diff --git a/sqlparser/lib/src/analysis/steps/reference_resolver.dart b/sqlparser/lib/src/analysis/steps/reference_resolver.dart index 29a24372..d4741039 100644 --- a/sqlparser/lib/src/analysis/steps/reference_resolver.dart +++ b/sqlparser/lib/src/analysis/steps/reference_resolver.dart @@ -1,5 +1,6 @@ part of '../analysis.dart'; +/// Resolves any open [Reference] it finds in the AST. class ReferenceResolver extends RecursiveVisitor { final AnalysisContext context; diff --git a/sqlparser/lib/src/analysis/steps/type_resolver.dart b/sqlparser/lib/src/analysis/steps/type_resolver.dart index 85fad9bd..154040d9 100644 --- a/sqlparser/lib/src/analysis/steps/type_resolver.dart +++ b/sqlparser/lib/src/analysis/steps/type_resolver.dart @@ -1,5 +1,7 @@ part of '../analysis.dart'; +/// Resolves the type of columns in a select statement and the type of +/// expressions appearing in a select statement. class TypeResolvingVisitor extends RecursiveVisitor { final AnalysisContext context; TypeResolver get types => context.types; diff --git a/sqlparser/lib/src/analysis/types/data.dart b/sqlparser/lib/src/analysis/types/data.dart index 70f1093c..7a3f84ee 100644 --- a/sqlparser/lib/src/analysis/types/data.dart +++ b/sqlparser/lib/src/analysis/types/data.dart @@ -11,6 +11,10 @@ enum BasicType { class ResolvedType { final BasicType type; + + /// We set hints for additional information that might be useful for + /// applications but aren't covered by just exposing a [BasicType]. See the + /// comment on [TypeHint] for examples. final TypeHint hint; final bool nullable; @@ -39,15 +43,18 @@ class ResolvedType { /// Provides more precise hints than the [BasicType]. For instance, booleans are /// stored as ints in sqlite, but it might be desirable to know whether an -/// expression will actually be a boolean. +/// expression will actually be a boolean, so we could set the +/// [ResolvedType.hint] to [IsBoolean]. abstract class TypeHint { const TypeHint(); } +/// Type hint to mark that this type will contain a boolean value. class IsBoolean extends TypeHint { const IsBoolean(); } +/// Type hint to mark that this type will contain a date time value. class IsDateTime extends TypeHint { const IsDateTime(); } diff --git a/sqlparser/lib/src/analysis/types/resolver.dart b/sqlparser/lib/src/analysis/types/resolver.dart index c072778d..af24ba7c 100644 --- a/sqlparser/lib/src/analysis/types/resolver.dart +++ b/sqlparser/lib/src/analysis/types/resolver.dart @@ -1,6 +1,6 @@ part of '../analysis.dart'; -const comparisonOperators = [ +const _comparisonOperators = [ TokenType.equal, TokenType.doubleEqual, TokenType.exclamationEqual, @@ -82,7 +82,7 @@ class TypeResolver { return const ResolveResult(ResolvedType.bool()); } else if (expr is BinaryExpression) { final operator = expr.operator.type; - if (comparisonOperators.contains(operator)) { + if (_comparisonOperators.contains(operator)) { return const ResolveResult(ResolvedType.bool()); } else { final type = _encapsulate(expr.childNodes.cast(), @@ -277,9 +277,22 @@ class TypeResolver { } } +/// Result of resolving a type. This can either have the resolved [type] set, +/// or it can inform the called that it [needsContext] to resolve the type +/// properly. Failure to resolve the type will have the [unknown] flag set. +/// +/// When you see a [ResolveResult] that is unknown or needs context in the +/// final AST returned by [SqlEngine.analyze], assume that the type cannot be +/// determined. class ResolveResult { + /// The resolved type. final ResolvedType type; + + /// Whether more context is needed to resolve the type. Used internally by the + /// analyze. final bool needsContext; + + /// Whether type resolution failed. final bool unknown; const ResolveResult(this.type) @@ -296,6 +309,9 @@ class ResolveResult { bool get nullable => type?.nullable ?? true; + /// Copies the result with the [nullable] information, if there is one. If + /// there isn't, the failure state will be copied into the new + /// [ResolveResult]. ResolveResult withNullable(bool nullable) { if (type != null) { return ResolveResult(type.withNullable(nullable)); diff --git a/sqlparser/lib/src/ast/ast.dart b/sqlparser/lib/src/ast/ast.dart index def6dfe7..140f9f4f 100644 --- a/sqlparser/lib/src/ast/ast.dart +++ b/sqlparser/lib/src/ast/ast.dart @@ -21,25 +21,35 @@ part 'expressions/variables.dart'; part 'statements/select.dart'; part 'statements/statement.dart'; +/// A node in the abstract syntax tree of an SQL statement. abstract class AstNode { /// The parent of this node, or null if this is the root node. Will be set /// by the analyzer after the tree has been parsed. AstNode parent; + /// The first token that appears in this node. This information is not set for + /// all nodes. Token first; + + /// The last token that appears in this node. This information is not set for + /// all nodes. Token last; - /// The first index in the source that belongs to this node + /// The first index in the source that belongs to this node. Not set for all + /// nodes. int get firstPosition => first.span.start.offset; - /// The last position that belongs to node, exclusive + /// The last position that belongs to node, exclusive. Not set for all nodes. int get lastPosition => last.span.end.offset; + /// Sets the [AstNode.first] and [AstNode.last] property in one go. void setSpan(Token first, Token last) { this.first = first; this.last = last; } + /// Returns all parents of this node up to the root. If this node is the root, + /// the iterable will be empty. Iterable get parents sync* { var node = parent; while (node != null) { @@ -48,6 +58,8 @@ abstract class AstNode { } } + /// Recursively returns all descendants of this node, e.g. its children, their + /// children and so on. The tree will be pre-order traversed. Iterable get allDescendants sync* { for (var child in childNodes) { yield child; @@ -56,10 +68,20 @@ abstract class AstNode { } final Map _metadata = {}; + + /// Returns the metadata of type [T] that might have been set on this node, or + /// null if none was found. + /// Nodes can have arbitrary annotations on them set via [setMeta] and + /// obtained via [meta]. This mechanism is used to, for instance, attach + /// variable scopes to a subtree. T meta() { return _metadata[T] as T; } + /// Sets the metadata of type [T] to the specified [value]. + /// Nodes can have arbitrary annotations on them set via [setMeta] and + /// obtained via [meta]. This mechanism is used to, for instance, attach + /// variable scopes to a subtree. void setMeta(T value) { _metadata[T] = value; } @@ -78,11 +100,17 @@ abstract class AstNode { throw StateError('No reference scope found in this or any parent node'); } + /// Applies a [ReferenceScope] to this node. Variables declared in [scope] + /// will be visible to this node and to [allDescendants]. set scope(ReferenceScope scope) { setMeta(scope); } + /// All direct children of this node. Iterable get childNodes; + + /// Calls the appropriate method on the [visitor] to make it recognize this + /// node. T accept(AstVisitor visitor); /// Whether the content of this node is equal to the [other] node of the same diff --git a/sqlparser/lib/src/engine/sql_engine.dart b/sqlparser/lib/src/engine/sql_engine.dart index 2adeebe5..2e027b57 100644 --- a/sqlparser/lib/src/engine/sql_engine.dart +++ b/sqlparser/lib/src/engine/sql_engine.dart @@ -2,6 +2,7 @@ import 'package:sqlparser/src/analysis/analysis.dart'; import 'package:sqlparser/src/ast/ast.dart'; import 'package:sqlparser/src/reader/parser/parser.dart'; import 'package:sqlparser/src/reader/tokenizer/scanner.dart'; +import 'package:sqlparser/src/reader/tokenizer/token.dart'; class SqlEngine { /// All tables registered with [registerTable]. @@ -26,13 +27,17 @@ class SqlEngine { /// Parses the [sql] statement. At the moment, only SELECT statements are /// supported. - AstNode parse(String sql) { + ParseResult parse(String sql) { final scanner = Scanner(sql); final tokens = scanner.scanTokens(); - // todo error handling from scanner + + if (scanner.errors.isNotEmpty) { + throw CumulatedTokenizerException(scanner.errors); + } final parser = Parser(tokens); - return parser.statement(); + final stmt = parser.statement(); + return ParseResult._(stmt, parser.errors); } /// Parses and analyzes the [sql] statement, which at the moment has to be a @@ -43,7 +48,8 @@ class SqlEngine { /// and result columns, so all known tables should be registered using /// [registerTable] before calling this method. AnalysisContext analyze(String sql) { - final node = parse(sql); + final result = parse(sql); + final node = result.rootNode; const SetParentVisitor().startAtRoot(node); final context = AnalysisContext(node, sql); @@ -58,3 +64,17 @@ class SqlEngine { return context; } } + +/// The result of parsing an sql query. Contains the root of the AST and all +/// errors that might have occurred during parsing. +class ParseResult { + /// The topmost node in the sql AST that was parsed. + final AstNode rootNode; + + /// A list of all errors that occurred during parsing. [ParsingError.toString] + /// returns a helpful description of what went wrong, along with the position + /// where the error occurred. + final List errors; + + ParseResult._(this.rootNode, this.errors); +} diff --git a/sqlparser/lib/src/reader/tokenizer/token.dart b/sqlparser/lib/src/reader/tokenizer/token.dart index cdb6e290..dafc459c 100644 --- a/sqlparser/lib/src/reader/tokenizer/token.dart +++ b/sqlparser/lib/src/reader/tokenizer/token.dart @@ -174,3 +174,16 @@ class TokenizerError { TokenizerError(this.message, this.location); } + +/// Thrown by the sql engine when a sql statement can't be tokenized. +class CumulatedTokenizerException implements Exception { + final List errors; + CumulatedTokenizerException(this.errors); + + @override + String toString() { + final explanation = + errors.map((e) => '${e.message} at ${e.location}').join(', '); + return 'Malformed sql: $explanation'; + } +} diff --git a/sqlparser/test/parser/select/from_test.dart b/sqlparser/test/parser/select/from_test.dart index 9cb9aeb5..2cf6c0a6 100644 --- a/sqlparser/test/parser/select/from_test.dart +++ b/sqlparser/test/parser/select/from_test.dart @@ -1,3 +1,4 @@ +import 'package:sqlparser/src/reader/tokenizer/token.dart'; import 'package:test/test.dart'; import 'package:sqlparser/sqlparser.dart'; import 'package:sqlparser/src/utils/ast_equality.dart'; diff --git a/sqlparser/test/parser/select/group_by_test.dart b/sqlparser/test/parser/select/group_by_test.dart index 6150e40f..fcdeed3e 100644 --- a/sqlparser/test/parser/select/group_by_test.dart +++ b/sqlparser/test/parser/select/group_by_test.dart @@ -1,3 +1,4 @@ +import 'package:sqlparser/src/reader/tokenizer/token.dart'; import 'package:test/test.dart'; import 'package:sqlparser/sqlparser.dart'; import 'package:sqlparser/src/utils/ast_equality.dart'; diff --git a/sqlparser/test/parser/select/limit_test.dart b/sqlparser/test/parser/select/limit_test.dart index 8bff216f..99426b90 100644 --- a/sqlparser/test/parser/select/limit_test.dart +++ b/sqlparser/test/parser/select/limit_test.dart @@ -1,3 +1,4 @@ +import 'package:sqlparser/src/reader/tokenizer/token.dart'; import 'package:test/test.dart'; import 'package:sqlparser/sqlparser.dart'; import 'package:sqlparser/src/utils/ast_equality.dart'; diff --git a/sqlparser/test/parser/select/order_by_test.dart b/sqlparser/test/parser/select/order_by_test.dart index 80a91541..e0830858 100644 --- a/sqlparser/test/parser/select/order_by_test.dart +++ b/sqlparser/test/parser/select/order_by_test.dart @@ -1,3 +1,4 @@ +import 'package:sqlparser/src/reader/tokenizer/token.dart'; import 'package:test/test.dart'; import 'package:sqlparser/sqlparser.dart'; import 'package:sqlparser/src/utils/ast_equality.dart';