// Copyright 2017 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // The go-ethereum library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . package asm import ( "fmt" "os" "strings" "unicode" "unicode/utf8" ) // stateFn is used through the lifetime of the // lexer to parse the different values at the // current state. type stateFn func(*lexer) stateFn // token is emitted when the lexer has discovered // a new parsable token. These are delivered over // the tokens channels of the lexer type token struct { typ tokenType lineno int text string } // tokenType are the different types the lexer // is able to parse and return. type tokenType int const ( eof tokenType = iota // end of file lineStart // emitted when a line starts lineEnd // emitted when a line ends invalidStatement // any invalid statement element // any element during element parsing label // label is emitted when a label is found labelDef // label definition is emitted when a new label is found number // number is emitted when a number is found stringValue // stringValue is emitted when a string has been found Numbers = "1234567890" // characters representing any decimal number HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric ) // String implements stringer func (it tokenType) String() string { if int(it) > len(stringtokenTypes) { return "invalid" } return stringtokenTypes[it] } var stringtokenTypes = []string{ eof: "EOF", invalidStatement: "invalid statement", element: "element", lineEnd: "end of line", lineStart: "new line", label: "label", labelDef: "label definition", number: "number", stringValue: "string", } // lexer is the basic construct for parsing // source code and turning them in to tokens. // Tokens are interpreted by the compiler. type lexer struct { input string // input contains the source code of the program tokens chan token // tokens is used to deliver tokens to the listener state stateFn // the current state function lineno int // current line number in the source file start, pos, width int // positions for lexing and returning value debug bool // flag for triggering debug output } // lex lexes the program by name with the given source. It returns a // channel on which the tokens are delivered. func Lex(name string, source []byte, debug bool) <-chan token { ch := make(chan token) l := &lexer{ input: string(source), tokens: ch, state: lexLine, debug: debug, } go func() { l.emit(lineStart) for l.state != nil { l.state = l.state(l) } l.emit(eof) close(l.tokens) }() return ch } // next returns the next rune in the program's source. func (l *lexer) next() (rune rune) { if l.pos >= len(l.input) { l.width = 0 return 0 } rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return rune } // backup backsup the last parsed element (multi-character) func (l *lexer) backup() { l.pos -= l.width } // peek returns the next rune but does not advance the seeker func (l *lexer) peek() rune { r := l.next() l.backup() return r } // ignore advances the seeker and ignores the value func (l *lexer) ignore() { l.start = l.pos } // Accepts checks whether the given input matches the next rune func (l *lexer) accept(valid string) bool { if strings.ContainsRune(valid, l.next()) { return true } l.backup() return false } // acceptRun will continue to advance the seeker until valid // can no longer be met. func (l *lexer) acceptRun(valid string) { for strings.ContainsRune(valid, l.next()) { } l.backup() } // acceptRunUntil is the inverse of acceptRun and will continue // to advance the seeker until the rune has been found. func (l *lexer) acceptRunUntil(until rune) bool { // Continues running until a rune is found for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { if i == 0 { return false } } return true } // blob returns the current value func (l *lexer) blob() string { return l.input[l.start:l.pos] } // Emits a new token on to token channel for processing func (l *lexer) emit(t tokenType) { token := token{t, l.lineno, l.blob()} if l.debug { fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) } l.tokens <- token l.start = l.pos } // lexLine is state function for lexing lines func lexLine(l *lexer) stateFn { for { switch r := l.next(); { case r == '\n': l.emit(lineEnd) l.ignore() l.lineno++ l.emit(lineStart) case r == ';' && l.peek() == ';': return lexComment case isSpace(r): l.ignore() case isLetter(r) || r == '_': return lexElement case isNumber(r): return lexNumber case r == '@': l.ignore() return lexLabel case r == '"': return lexInsideString default: return nil } } } // lexComment parses the current position until the end // of the line and discards the text. func lexComment(l *lexer) stateFn { l.acceptRunUntil('\n') l.ignore() return lexLine } // lexLabel parses the current label, emits and returns // the lex text state function to advance the parsing // process. func lexLabel(l *lexer) stateFn { l.acceptRun(Alpha + "_") l.emit(label) return lexLine } // lexInsideString lexes the inside of a string until // until the state function finds the closing quote. // It returns the lex text state function. func lexInsideString(l *lexer) stateFn { if l.acceptRunUntil('"') { l.emit(stringValue) } return lexLine } func lexNumber(l *lexer) stateFn { acceptance := Numbers if l.accept("0") || l.accept("xX") { acceptance = HexadecimalNumbers } l.acceptRun(acceptance) l.emit(number) return lexLine } func lexElement(l *lexer) stateFn { l.acceptRun(Alpha + "_" + Numbers) if l.peek() == ':' { l.emit(labelDef) l.accept(":") l.ignore() } else { l.emit(element) } return lexLine } func isLetter(t rune) bool { return unicode.IsLetter(t) } func isSpace(t rune) bool { return unicode.IsSpace(t) } func isNumber(t rune) bool { return unicode.IsNumber(t) }