Beware of the mightily inefficient four-pass parser

(computers are fast these days ok??)
This commit is contained in:
Simon Binder 2019-06-26 23:07:30 +02:00
parent 674f4c6a9c
commit 6238e459d1
No known key found for this signature in database
GPG Key ID: 7891917E4147B8C0
9 changed files with 146 additions and 61 deletions

View File

@ -1,7 +1,6 @@
# sqlparser
An sql parser and static analyzer, written in pure Dart. Currently in development and
not really suitable for any use.
An sql parser and static analyzer, written in pure Dart. Currently in development.
## Using this library
@ -20,3 +19,18 @@ LIMIT 5 OFFSET 5 * 3
// ???
profit();
```
## Features
Not all features are available yet, put parsing select statements (even complex ones!) and
performing analysis on them works!
### AST Parsing
Can parse the abstract syntax tree of any sqlite statement with `SqlEngine.parse`.
### Static analysis
Given information about all tables and a sql statement, this library can:
1. determine which result rows a query is going to have
2. Determine the static type of variables included in the query
3. issue some basic warnings on queries that are syntactically valid but won't run

View File

@ -8,6 +8,7 @@ part 'schema/column.dart';
part 'schema/references.dart';
part 'schema/table.dart';
part 'steps/column_resolver.dart';
part 'steps/reference_finder.dart';
part 'steps/reference_resolver.dart';
part 'steps/set_parent_visitor.dart';

View File

@ -1,7 +1,20 @@
part of '../analysis.dart';
class Column with Referencable, Typeable {
abstract class Column with Referencable, Typeable {
String get name;
}
class TableColumn extends Column {
@override
final String name;
Column(this.name);
TableColumn(this.name);
}
class ExpressionColumn extends Column {
@override
final String name;
final Expression expression;
ExpressionColumn({@required this.name, this.expression});
}

View File

@ -21,7 +21,7 @@ class Table with ResultSet, VisibleToChildren {
final String name;
@override
final List<Column> resolvedColumns;
final List<TableColumn> resolvedColumns;
Table({@required this.name, this.resolvedColumns});
}

View File

@ -0,0 +1,93 @@
part of '../analysis.dart';
class ColumnResolver extends RecursiveVisitor<void> {
final AnalysisContext context;
ColumnResolver(this.context);
@override
void visitSelectStatement(SelectStatement e) {
_resolveSelect(e, []);
}
void _handle(Queryable queryable, List<Column> availableColumns) {
queryable.when(
isTable: (table) {
_resolveTableReference(table);
availableColumns.addAll(table.resultSet.resolvedColumns);
},
isSelect: (select) {
// the inner select statement doesn't have access to columns defined in
// the outer statements.
_resolveSelect(select.statement, []);
availableColumns.addAll(select.statement.resolvedColumns);
},
isJoin: (join) {
for (var query in join.joins.map((j) => j.query)) {
_handle(query, availableColumns);
}
},
);
}
void _resolveSelect(SelectStatement s, List<Column> availableColumns) {
final availableColumns = <Column>[];
for (var queryable in s.from) {
_handle(queryable, availableColumns);
}
final usedColumns = <Column>[];
final scope = s.scope;
// a select statement can include everything from its sub queries as a
// result, but also expressions that appear as result columns
for (var resultColumn in s.columns) {
if (resultColumn is StarResultColumn) {
if (resultColumn.tableName != null) {
final tableResolver = scope
.resolve<ResolvesToResultSet>(resultColumn.tableName, orElse: () {
context.reportError(AnalysisError(
type: AnalysisErrorType.referencedUnknownTable,
message: 'Unknown table: ${resultColumn.tableName}',
relevantNode: resultColumn,
));
});
usedColumns.addAll(tableResolver.resultSet.resolvedColumns);
} else {
// we have a * column, that would be all available columns
usedColumns.addAll(availableColumns);
}
} else if (resultColumn is ExpressionResultColumn) {
final name = _nameOfResultColumn(resultColumn);
usedColumns.add(
ExpressionColumn(name: name, expression: resultColumn.expression),
);
}
}
s.resolvedColumns = usedColumns;
}
String _nameOfResultColumn(ExpressionResultColumn c) {
if (c.as != null) return c.as;
if (c.expression is Reference) {
return (c.expression as Reference).columnName;
}
// todo I think in this case it's just the literal lexeme?
return 'TODO';
}
void _resolveTableReference(TableReference r) {
final scope = r.scope;
final resolvedTable = scope.resolve<Table>(r.tableName, orElse: () {
context.reportError(AnalysisError(
type: AnalysisErrorType.referencedUnknownTable,
relevantNode: r,
message: 'The table ${r.tableName} could not be found',
));
});
r.resolved = resolvedTable;
}
}

View File

@ -7,12 +7,11 @@ class ReferenceResolver extends RecursiveVisitor<void> {
@override
void visitFunction(FunctionExpression e) {
final scope = e.scope;
e.resolved = scope.resolve<SqlFunction>(e.name, orElse: () {
e.resolved = e.scope.resolve<SqlFunction>(e.name, orElse: () {
context.reportError(AnalysisError(
type: AnalysisErrorType.unknownFunction,
relevantNode: e,
message: 'Unknown function: ${e.name}',
relevantNode: e,
));
});
visitChildren(e);
@ -32,7 +31,7 @@ class ReferenceResolver extends RecursiveVisitor<void> {
relevantNode: e,
));
});
final resultSet = _resolve(tableResolver, scope);
final resultSet = tableResolver.resultSet;
if (resultSet == null) {
context.reportError(AnalysisError(
@ -55,7 +54,7 @@ class ReferenceResolver extends RecursiveVisitor<void> {
// todo special case for USING (...) in joins?
final tables = scope.allOf<ResolvesToResultSet>();
final columns = tables
.map((t) => _resolve(t, scope)?.findColumn(e.columnName))
.map((t) => t.resultSet.findColumn(e.columnName))
.where((c) => c != null)
.toSet();
@ -74,43 +73,4 @@ class ReferenceResolver extends RecursiveVisitor<void> {
visitChildren(e);
}
ResultSet _resolve(ResolvesToResultSet resolver, ReferenceScope scope,
{Function orElse()}) {
// already resolved? don't do the same work twice!
if (resolver.resultSet != null) {
return resolver.resultSet;
}
if (resolver is ResultSet) {
return resolver;
} else if (resolver is TableReference) {
final table = resolver;
final resolvedTable = scope.resolve<Table>(table.tableName, orElse: () {
context.reportError(AnalysisError(
type: AnalysisErrorType.referencedUnknownTable,
relevantNode: table,
message: 'The table ${table.tableName} could not be found',
));
});
table.resolved = resolvedTable;
return resolvedTable;
}
throw ArgumentError('Resolving not yet implemented for $resolver');
}
@override
void visitQueryable(Queryable e) {
final scope = e.scope;
e.when(
isTable: (table) {
_resolve(table, scope);
},
isSelect: (select) {},
isJoin: (join) {},
);
visitChildren(e);
}
}

View File

@ -11,6 +11,11 @@ class SelectStatement extends AstNode with ResultSet {
final OrderBy orderBy;
final Limit limit;
/// The resolved list of columns returned by this select statements. Not
/// available from the parse tree, will be set later by the analyzer.
@override
List<Column> resolvedColumns;
SelectStatement(
{this.distinct = false,
this.columns,
@ -40,12 +45,6 @@ class SelectStatement extends AstNode with ResultSet {
bool contentEquals(SelectStatement other) {
return other.distinct == distinct;
}
@override
List<Column> get resolvedColumns {
throw UnimplementedError(
'todo: implement column resolution for select statement');
}
}
abstract class ResultColumn extends AstNode {

View File

@ -48,7 +48,7 @@ class SqlEngine {
final scope = _constructRootScope();
ReferenceFinder(globalScope: scope).start(node);
node.accept(ReferenceResolver(context));
node..accept(ColumnResolver(context))..accept(ReferenceResolver(context));
return context;
}

View File

@ -11,9 +11,9 @@ void main() {
expect((column.expression as FunctionExpression).resolved, abs);
});
test('resolves table names and aliases', () {
final id = Column('id');
final content = Column('content');
test('correctly resolves return columns', () {
final id = TableColumn('id');
final content = TableColumn('content');
final demoTable = Table(
name: 'demo',
@ -21,9 +21,14 @@ void main() {
);
final engine = SqlEngine()..registerTable(demoTable);
final context = engine.analyze('SELECT id, d.content FROM demo AS d');
final context = engine.analyze('SELECT id, d.content, * FROM demo AS d');
final select = context.root as SelectStatement;
final resolvedColumns = select.resolvedColumns;
expect(
resolvedColumns.map((c) => c.name), ['id', 'content', 'id', 'content']);
final firstColumn = select.columns[0] as ExpressionResultColumn;
final secondColumn = select.columns[1] as ExpressionResultColumn;
final from = select.from[0] as TableReference;