summaryrefslogtreecommitdiff
path: root/vendor/modernc.org/gc/v3/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/modernc.org/gc/v3/scanner.go')
-rw-r--r--vendor/modernc.org/gc/v3/scanner.go1446
1 files changed, 0 insertions, 1446 deletions
diff --git a/vendor/modernc.org/gc/v3/scanner.go b/vendor/modernc.org/gc/v3/scanner.go
deleted file mode 100644
index a3815a8f8..000000000
--- a/vendor/modernc.org/gc/v3/scanner.go
+++ /dev/null
@@ -1,1446 +0,0 @@
-// Copyright 2022 The Gc Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package gc // import "modernc.org/gc/v3"
-
-import (
- "bytes"
- "fmt"
- "go/token"
- "path/filepath"
- "strings"
- "unicode"
- "unicode/utf8"
-
- "modernc.org/mathutil"
- mtoken "modernc.org/token"
-)
-
-var (
- _ Node = (*Token)(nil)
- _ Node = (*nonode)(nil)
-
- keywords = map[string]token.Token{
- "break": BREAK,
- "case": CASE,
- "chan": CHAN,
- "const": CONST,
- "continue": CONTINUE,
- "default": DEFAULT,
- "defer": DEFER,
- "else": ELSE,
- "fallthrough": FALLTHROUGH,
- "for": FOR,
- "func": FUNC,
- "go": GO,
- "goto": GOTO,
- "if": IF,
- "import": IMPORT,
- "interface": INTERFACE,
- "map": MAP,
- "package": PACKAGE,
- "range": RANGE,
- "return": RETURN,
- "select": SELECT,
- "struct": STRUCT,
- "switch": SWITCH,
- "type": TYPE,
- "var": VAR,
- }
-
- lineCommentTag = []byte("line ")
- znode = &nonode{}
-)
-
-type nonode struct{}
-
-func (*nonode) Position() (r token.Position) { return r }
-func (*nonode) Source(full bool) string { return "" }
-
-// Token represents a lexeme, its position and its semantic value.
-type Token struct { // 16 bytes on 64 bit arch
- source *source
-
- ch int32
- index int32
-}
-
-// Ch returns which token t represents
-func (t Token) Ch() token.Token { return token.Token(t.ch) }
-
-// Source implements Node.
-func (t Token) Source(full bool) string {
- // trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf))
- sep := t.Sep()
- if !full && sep != "" {
- sep = " "
- }
- src := t.Src()
- if !full && strings.ContainsRune(src, '\n') {
- src = " "
- }
- // trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src)
- return sep + src
-}
-
-// Positions implements Node.
-func (t Token) Position() (r token.Position) {
- if t.source == nil {
- return r
- }
-
- s := t.source
- off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src)
- return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
-}
-
-// Prev returns the token preceding t or a zero value if no such token exists.
-func (t Token) Prev() (r Token) {
- if index := t.index - 1; index >= 0 {
- s := t.source
- return Token{source: s, ch: s.toks[index].ch, index: index}
- }
-
- return r
-}
-
-// Next returns the token following t or a zero value if no such token exists.
-func (t Token) Next() (r Token) {
- if index := t.index + 1; index < int32(len(t.source.toks)) {
- s := t.source
- return Token{source: s, ch: s.toks[index].ch, index: index}
- }
-
- return r
-}
-
-// Sep returns any separators, combined, preceding t.
-func (t Token) Sep() string {
- s := t.source
- if p, ok := s.sepPatches[t.index]; ok {
- return p
- }
-
- return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src])
-}
-
-// SetSep sets t's separator.
-func (t Token) SetSep(s string) {
- src := t.source
- if src.sepPatches == nil {
- src.sepPatches = map[int32]string{}
- }
- src.sepPatches[t.index] = s
-}
-
-// Src returns t's source form.
-func (t Token) Src() string {
- s := t.source
- if p, ok := s.srcPatches[t.index]; ok {
- return p
- }
-
- if t.ch != int32(EOF) {
- next := t.source.off
- if t.index < int32(len(s.toks))-1 {
- next = s.toks[t.index+1].sep
- }
- return string(s.buf[s.toks[t.index].src:next])
- }
-
- return ""
-}
-
-// SetSrc sets t's source form.
-func (t Token) SetSrc(s string) {
- src := t.source
- if src.srcPatches == nil {
- src.srcPatches = map[int32]string{}
- }
- src.srcPatches[t.index] = s
-}
-
-// IsValid reports t is a valid token. Zero value reports false.
-func (t Token) IsValid() bool { return t.source != nil }
-
-type tok struct { // 12 bytes
- ch int32
- sep int32
- src int32
-}
-
-func (t *tok) token() token.Token { return token.Token(t.ch) }
-
-func (t *tok) position(s *source) (r token.Position) {
- off := mathutil.MinInt32(int32(len(s.buf)), t.src)
- return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
-}
-
-// source represents a single Go source file, editor text buffer etc.
-type source struct {
- buf []byte
- file *mtoken.File
- name string
- sepPatches map[int32]string
- srcPatches map[int32]string
- toks []tok
-
- base int32
- off int32
-}
-
-// 'buf' becomes owned by the result and must not be modified afterwards.
-func newSource(name string, buf []byte) *source {
- file := mtoken.NewFile(name, len(buf))
- return &source{
- buf: buf,
- file: file,
- name: name,
- base: int32(file.Base()),
- }
-}
-
-type ErrWithPosition struct {
- pos token.Position
- err error
-}
-
-func (e ErrWithPosition) String() string {
- switch {
- case e.pos.IsValid():
- return fmt.Sprintf("%v: %v", e.pos, e.err)
- default:
- return fmt.Sprintf("%v", e.err)
- }
-}
-
-type errList []ErrWithPosition
-
-func (e errList) Err() (r error) {
- if len(e) == 0 {
- return nil
- }
-
- return e
-}
-
-func (e errList) Error() string {
- w := 0
- prev := ErrWithPosition{pos: token.Position{Offset: -1}}
- for _, v := range e {
- if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() {
- e[w] = v
- w++
- prev = v
- }
- }
-
- var a []string
- for _, v := range e {
- a = append(a, fmt.Sprint(v))
- }
- return strings.Join(a, "\n")
-}
-
-func (e *errList) err(pos token.Position, msg string, args ...interface{}) {
- if trcErrors {
- trc("FAIL "+msg, args...)
- }
- switch {
- case len(args) == 0:
- *e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)})
- default:
- *e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)})
- }
-}
-
-type scanner struct {
- *source
- dir string
- errs errList
- tok tok
-
- last int32
-
- errBudget int
-
- c byte // Lookahead byte.
-
- eof bool
- isClosed bool
-}
-
-func newScanner(name string, buf []byte) *scanner {
- dir, _ := filepath.Split(name)
- r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir}
- switch {
- case len(buf) == 0:
- r.eof = true
- default:
- r.c = buf[0]
- if r.c == '\n' {
- r.file.AddLine(int(r.base + r.off))
- }
- }
- return r
-}
-
-func isDigit(c byte) bool { return c >= '0' && c <= '9' }
-func isHexDigit(c byte) bool { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' }
-func isIDNext(c byte) bool { return isIDFirst(c) || isDigit(c) }
-func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' }
-
-func isIDFirst(c byte) bool {
- return c >= 'a' && c <= 'z' ||
- c >= 'A' && c <= 'Z' ||
- c == '_'
-}
-
-func (s *scanner) position() token.Position {
- return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true))
-}
-
-func (s *scanner) pos(off int32) token.Position {
- return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
-}
-
-func (s *scanner) token() Token {
- return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)}
-}
-
-func (s *scanner) err(off int32, msg string, args ...interface{}) {
- if s.errBudget <= 0 {
- s.close()
- return
- }
-
- s.errBudget--
- if n := int32(len(s.buf)); off >= n {
- off = n
- }
- s.errs.err(s.pos(off), msg, args...)
-}
-
-func (s *scanner) close() {
- if s.isClosed {
- return
- }
-
- s.tok.ch = int32(ILLEGAL)
- s.eof = true
- s.isClosed = true
-}
-
-func (s *scanner) next() {
- if s.eof {
- return
- }
-
- s.off++
- if int(s.off) == len(s.buf) {
- s.c = 0
- s.eof = true
- return
- }
-
- s.c = s.buf[s.off]
- if s.c == '\n' {
- s.file.AddLine(int(s.base + s.off))
- }
-}
-
-func (s *scanner) nextN(n int) {
- if int(s.off) == len(s.buf)-n {
- s.c = 0
- s.eof = true
- return
- }
-
- s.off += int32(n)
- s.c = s.buf[s.off]
- if s.c == '\n' {
- s.file.AddLine(int(s.base + s.off))
- }
-}
-
-func (s *scanner) scan() (r bool) {
- if s.isClosed {
- return false
- }
-
- s.last = s.tok.ch
- s.tok.sep = s.off
- s.tok.ch = -1
- for {
- if r = s.scan0(); !r || s.tok.ch >= 0 {
- s.toks = append(s.toks, s.tok)
- // trc("", dump(s.token()))
- return r
- }
- }
-}
-
-func (s *scanner) scan0() (r bool) {
- s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf)))
- switch s.c {
- case ' ', '\t', '\r', '\n':
- // White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage
- // returns (U+000D), and newlines (U+000A), is ignored except as it separates
- // tokens that would otherwise combine into a single token.
- if s.c == '\n' && s.injectSemi() {
- return true
- }
-
- s.next()
- return true
- case '/':
- off := s.off
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(QUO_ASSIGN)
- case '/':
- // Line comments start with the character sequence // and stop at the end of
- // the line.
- s.next()
- s.lineComment(off)
- return true
- case '*':
- // General comments start with the character sequence /* and stop with the
- // first subsequent character sequence */.
- s.next()
- s.generalComment(off)
- return true
- default:
- s.tok.ch = int32(QUO)
- }
- case '(':
- s.tok.ch = int32(LPAREN)
- s.next()
- case ')':
- s.tok.ch = int32(RPAREN)
- s.next()
- case '[':
- s.tok.ch = int32(LBRACK)
- s.next()
- case ']':
- s.tok.ch = int32(RBRACK)
- s.next()
- case '{':
- s.tok.ch = int32(LBRACE)
- s.next()
- case '}':
- s.tok.ch = int32(RBRACE)
- s.next()
- case ',':
- s.tok.ch = int32(COMMA)
- s.next()
- case ';':
- s.tok.ch = int32(SEMICOLON)
- s.next()
- case '~':
- s.tok.ch = int32(TILDE)
- s.next()
- case '"':
- off := s.off
- s.next()
- s.stringLiteral(off)
- case '\'':
- off := s.off
- s.next()
- s.runeLiteral(off)
- case '`':
- s.next()
- for {
- switch {
- case s.c == '`':
- s.next()
- s.tok.ch = int32(STRING)
- return true
- case s.eof:
- s.err(s.off, "raw string literal not terminated")
- s.tok.ch = int32(STRING)
- return true
- case s.c == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- default:
- s.next()
- }
- }
- case '.':
- s.next()
- off := s.off
- if isDigit(s.c) {
- s.dot(false, true)
- return true
- }
-
- if s.c != '.' {
- s.tok.ch = int32(PERIOD)
- return true
- }
-
- s.next()
- if s.c != '.' {
- s.off = off
- s.c = '.'
- s.tok.ch = int32(PERIOD)
- return true
- }
-
- s.next()
- s.tok.ch = int32(ELLIPSIS)
- return true
- case '%':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(REM_ASSIGN)
- default:
- s.tok.ch = int32(REM)
- }
- case '*':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(MUL_ASSIGN)
- default:
- s.tok.ch = int32(MUL)
- }
- case '^':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(XOR_ASSIGN)
- default:
- s.tok.ch = int32(XOR)
- }
- case '+':
- s.next()
- switch s.c {
- case '+':
- s.next()
- s.tok.ch = int32(INC)
- case '=':
- s.next()
- s.tok.ch = int32(ADD_ASSIGN)
- default:
- s.tok.ch = int32(ADD)
- }
- case '-':
- s.next()
- switch s.c {
- case '-':
- s.next()
- s.tok.ch = int32(DEC)
- case '=':
- s.next()
- s.tok.ch = int32(SUB_ASSIGN)
- default:
- s.tok.ch = int32(SUB)
- }
- case ':':
- s.next()
- switch {
- case s.c == '=':
- s.next()
- s.tok.ch = int32(DEFINE)
- default:
- s.tok.ch = int32(COLON)
- }
- case '=':
- s.next()
- switch {
- case s.c == '=':
- s.next()
- s.tok.ch = int32(EQL)
- default:
- s.tok.ch = int32(ASSIGN)
- }
- case '!':
- s.next()
- switch {
- case s.c == '=':
- s.next()
- s.tok.ch = int32(NEQ)
- default:
- s.tok.ch = int32(NOT)
- }
- case '>':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(GEQ)
- case '>':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(SHR_ASSIGN)
- default:
- s.tok.ch = int32(SHR)
- }
- default:
- s.tok.ch = int32(GTR)
- }
- case '<':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(LEQ)
- case '<':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(SHL_ASSIGN)
- default:
- s.tok.ch = int32(SHL)
- }
- case '-':
- s.next()
- s.tok.ch = int32(ARROW)
- default:
- s.tok.ch = int32(LSS)
- }
- case '|':
- s.next()
- switch s.c {
- case '|':
- s.next()
- s.tok.ch = int32(LOR)
- case '=':
- s.next()
- s.tok.ch = int32(OR_ASSIGN)
- default:
- s.tok.ch = int32(OR)
- }
- case '&':
- s.next()
- switch s.c {
- case '&':
- s.next()
- s.tok.ch = int32(LAND)
- case '^':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(AND_NOT_ASSIGN)
- default:
- s.tok.ch = int32(AND_NOT)
- }
- case '=':
- s.next()
- s.tok.ch = int32(AND_ASSIGN)
- default:
- s.tok.ch = int32(AND)
- }
- default:
- switch {
- case isIDFirst(s.c):
- s.next()
- s.identifierOrKeyword()
- case isDigit(s.c):
- s.numericLiteral()
- case s.c >= 0x80:
- off := s.off
- switch r := s.rune(); {
- case unicode.IsLetter(r):
- s.identifierOrKeyword()
- case r == 0xfeff:
- if off == 0 { // Ignore BOM, but only at buffer start.
- return true
- }
-
- s.err(off, "illegal byte order mark")
- s.tok.ch = int32(ILLEGAL)
- default:
- s.err(s.off, "illegal character %#U", r)
- s.tok.ch = int32(ILLEGAL)
- }
- case s.eof:
- if s.injectSemi() {
- return true
- }
-
- s.close()
- s.tok.ch = int32(EOF)
- s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src)
- return false
- // case s.c == 0:
- // panic(todo("%v: %#U", s.position(), s.c))
- default:
- s.err(s.off, "illegal character %#U", s.c)
- s.next()
- s.tok.ch = int32(ILLEGAL)
- }
- }
- return true
-}
-
-func (s *scanner) runeLiteral(off int32) {
- // Leading ' consumed.
- ok := 0
- s.tok.ch = int32(CHAR)
- expOff := int32(-1)
- if s.eof {
- s.err(off, "rune literal not terminated")
- return
- }
-
- for {
- switch s.c {
- case '\\':
- ok++
- s.next()
- switch s.c {
- case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
- s.next()
- case 'x', 'X':
- s.next()
- for i := 0; i < 2; i++ {
- if s.c == '\'' {
- if i != 2 {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- }
- s.next()
- return
- }
-
- if !isHexDigit(s.c) {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- break
- }
- s.next()
- }
- case 'u':
- s.u(4)
- case 'U':
- s.u(8)
- default:
- switch {
- case s.eof:
- s.err(s.base+s.off, "escape sequence not terminated")
- return
- case isOctalDigit(s.c):
- for i := 0; i < 3; i++ {
- s.next()
- if s.c == '\'' {
- if i != 2 {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- }
- s.next()
- return
- }
-
- if !isOctalDigit(s.c) {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- break
- }
- }
- default:
- s.err(s.off, "unknown escape sequence")
- }
- }
- case '\'':
- s.next()
- if ok != 1 {
- s.err(off, "illegal rune literal")
- }
- return
- case '\t':
- s.next()
- ok++
- default:
- switch {
- case s.eof:
- switch {
- case ok != 0:
- s.err(expOff, "rune literal not terminated")
- default:
- s.err(s.base+s.off, "rune literal not terminated")
- }
- return
- case s.c == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- case s.c < ' ':
- ok++
- s.err(s.off, "non-printable character: %#U", s.c)
- s.next()
- case s.c >= 0x80:
- ok++
- off := s.off
- if c := s.rune(); c == 0xfeff {
- s.err(off, "illegal byte order mark")
- }
- default:
- ok++
- s.next()
- }
- }
- if ok != 0 && expOff < 0 {
- expOff = s.off
- if s.eof {
- expOff++
- }
- }
- }
-}
-
-func (s *scanner) stringLiteral(off int32) {
- // Leadind " consumed.
- s.tok.ch = int32(STRING)
- for {
- switch {
- case s.c == '"':
- s.next()
- return
- case s.c == '\\':
- s.next()
- switch s.c {
- case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
- s.next()
- continue
- case 'x', 'X':
- s.next()
- if !isHexDigit(s.c) {
- panic(todo("%v: %#U", s.position(), s.c))
- }
-
- s.next()
- if !isHexDigit(s.c) {
- panic(todo("%v: %#U", s.position(), s.c))
- }
-
- s.next()
- continue
- case 'u':
- s.u(4)
- continue
- case 'U':
- s.u(8)
- continue
- default:
- switch {
- case isOctalDigit(s.c):
- s.next()
- if isOctalDigit(s.c) {
- s.next()
- }
- if isOctalDigit(s.c) {
- s.next()
- }
- continue
- default:
- s.err(off-1, "unknown escape sequence")
- }
- }
- case s.c == '\n':
- fallthrough
- case s.eof:
- s.err(off, "string literal not terminated")
- return
- case s.c == 0:
- s.err(s.off, "illegal character NUL")
- }
-
- switch {
- case s.c >= 0x80:
- off := s.off
- if s.rune() == 0xfeff {
- s.err(off, "illegal byte order mark")
- }
- continue
- }
-
- s.next()
- }
-}
-
-func (s *scanner) u(n int) (r rune) {
- // Leading u/U not consumed.
- s.next()
- off := s.off
- for i := 0; i < n; i++ {
- switch {
- case isHexDigit(s.c):
- var n rune
- switch {
- case s.c >= '0' && s.c <= '9':
- n = rune(s.c) - '0'
- case s.c >= 'a' && s.c <= 'f':
- n = rune(s.c) - 'a' + 10
- case s.c >= 'A' && s.c <= 'F':
- n = rune(s.c) - 'A' + 10
- }
- r = 16*r + n
- default:
- switch {
- case s.eof:
- s.err(s.base+s.off, "escape sequence not terminated")
- default:
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- }
- return r
- }
-
- s.next()
- }
- if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff {
- s.err(off-1, "escape sequence is invalid Unicode code point")
- }
- return r
-}
-
-func (s *scanner) identifierOrKeyword() {
-out:
- for {
- switch {
- case isIDNext(s.c):
- s.next()
- case s.c >= 0x80:
- off := s.off
- c := s.c
- switch r := s.rune(); {
- case unicode.IsLetter(r) || unicode.IsDigit(r):
- // already consumed
- default:
- s.off = off
- s.c = c
- break out
- }
- case s.eof:
- break out
- case s.c == 0:
- s.err(s.off, "illegal character NUL")
- break out
- default:
- break out
- }
- }
- if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 {
- s.tok.ch = int32(IDENT)
- }
-}
-
-func (s *scanner) numericLiteral() {
- // Leading decimal digit not consumed.
- var hasHexMantissa, needFrac bool
-more:
- switch s.c {
- case '0':
- s.next()
- switch s.c {
- case '.':
- // nop
- case 'b', 'B':
- s.next()
- s.binaryLiteral()
- return
- case 'e', 'E':
- s.exponent()
- s.tok.ch = int32(FLOAT)
- return
- case 'p', 'P':
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- s.exponent()
- s.tok.ch = int32(FLOAT)
- return
- case 'o', 'O':
- s.next()
- s.octalLiteral()
- return
- case 'x', 'X':
- hasHexMantissa = true
- needFrac = true
- s.tok.ch = int32(INT)
- s.next()
- if s.c == '.' {
- s.next()
- s.dot(hasHexMantissa, needFrac)
- return
- }
-
- if s.hexadecimals() == 0 {
- s.err(s.base+s.off, "hexadecimal literal has no digits")
- return
- }
-
- needFrac = false
- case 'i':
- s.next()
- s.tok.ch = int32(IMAG)
- return
- default:
- invalidOff := int32(-1)
- var invalidDigit byte
- for {
- if s.c == '_' {
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- if !isDigit(s.c) {
- s.err(s.off-1, "'_' must separate successive digits")
- }
- }
- if isOctalDigit(s.c) {
- s.next()
- continue
- }
-
- if isDigit(s.c) {
- if invalidOff < 0 {
- invalidOff = s.off
- invalidDigit = s.c
- }
- s.next()
- continue
- }
-
- break
- }
- switch s.c {
- case '.', 'e', 'E', 'i':
- break more
- }
- if isDigit(s.c) {
- break more
- }
- if invalidOff > 0 {
- s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
- }
- s.tok.ch = int32(INT)
- return
- }
- default:
- s.decimals()
- }
- switch s.c {
- case '.':
- s.next()
- s.dot(hasHexMantissa, needFrac)
- case 'p', 'P':
- if !hasHexMantissa {
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- }
- fallthrough
- case 'e', 'E':
- s.exponent()
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- return
- }
-
- s.tok.ch = int32(FLOAT)
- case 'i':
- s.next()
- s.tok.ch = int32(IMAG)
- default:
- s.tok.ch = int32(INT)
- }
-}
-
-func (s *scanner) octalLiteral() {
- // Leading 0o consumed.
- ok := false
- invalidOff := int32(-1)
- var invalidDigit byte
- s.tok.ch = int32(INT)
- for {
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- switch s.c {
- case '0', '1', '2', '3', '4', '5', '6', '7':
- s.next()
- ok = true
- case '8', '9':
- if invalidOff < 0 {
- invalidOff = s.off
- invalidDigit = s.c
- }
- s.next()
- case '.':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "invalid radix point in octal literal")
- s.next()
- case 'e', 'E':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
- s.exponent()
- case 'p', 'P':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- s.exponent()
- default:
- switch {
- case !ok:
- s.err(s.base+s.off, "octal literal has no digits")
- case invalidOff > 0:
- s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
- }
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- }
- return
- }
- }
-}
-
-func (s *scanner) binaryLiteral() {
- // Leading 0b consumed.
- ok := false
- invalidOff := int32(-1)
- var invalidDigit byte
- s.tok.ch = int32(INT)
- for {
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- switch s.c {
- case '0', '1':
- s.next()
- ok = true
- case '.':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "invalid radix point in binary literal")
- s.next()
- case 'e', 'E':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
- s.exponent()
- case 'p', 'P':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- s.exponent()
- default:
- if isDigit(s.c) {
- if invalidOff < 0 {
- invalidOff = s.off
- invalidDigit = s.c
- }
- s.next()
- continue
- }
-
- switch {
- case !ok:
- s.err(s.base+s.off, "binary literal has no digits")
- case invalidOff > 0:
- s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit)
- }
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- }
- return
- }
- }
-}
-
-func (s *scanner) generalComment(off int32) (injectSemi bool) {
- // Leading /* consumed
- off0 := s.off - 2
- var nl bool
- for {
- switch {
- case s.c == '*':
- s.next()
- switch s.c {
- case '/':
- s.lineInfo(off0, s.off+1)
- s.next()
- if nl {
- return s.injectSemi()
- }
-
- return false
- }
- case s.c == '\n':
- nl = true
- s.next()
- case s.eof:
- s.tok.ch = 0
- s.err(off, "comment not terminated")
- return true
- case s.c == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- default:
- s.next()
- }
- }
-}
-
-func (s *scanner) lineComment(off int32) (injectSemi bool) {
- // Leading // consumed
- off0 := s.off - 2
- for {
- switch {
- case s.c == '\n':
- s.lineInfo(off0, s.off+1)
- if s.injectSemi() {
- return true
- }
-
- s.next()
- return false
- case s.c >= 0x80:
- if c := s.rune(); c == 0xfeff {
- s.err(off+2, "illegal byte order mark")
- }
- case s.eof:
- s.off++
- if s.injectSemi() {
- return true
- }
-
- return false
- case s.c == 0:
- return false
- default:
- s.next()
- }
- }
-}
-
-func (s *scanner) lineInfo(off, next int32) {
- if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' {
- return
- }
-
- str := s.buf[off:next]
- if !bytes.HasPrefix(str[len("//"):], lineCommentTag) {
- return
- }
-
- switch {
- case str[1] == '*':
- str = str[:len(str)-len("*/")]
- default:
- str = str[:len(str)-len("\n")]
- }
- str = str[len("//"):]
-
- str, ln, ok := s.lineInfoNum(str[len("line "):])
- col := 0
- if ok == liBadNum || ok == liNoNum {
- return
- }
-
- hasCol := false
- var n int
- if str, n, ok = s.lineInfoNum(str); ok == liBadNum {
- return
- }
-
- if ok != liNoNum {
- col = ln
- ln = n
- hasCol = true
- }
-
- fn := strings.TrimSpace(string(str))
- switch {
- case fn == "" && hasCol:
- fn = s.pos(off).Filename
- case fn != "":
- fn = filepath.Clean(fn)
- if !filepath.IsAbs(fn) {
- fn = filepath.Join(s.dir, fn)
- }
- }
- // trc("set %v %q %v %v", next, fn, ln, col)
- s.file.AddLineColumnInfo(int(next), fn, ln, col)
-}
-
-const (
- liNoNum = iota
- liBadNum
- liOK
-)
-
-func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) {
- // trc("==== %q", str)
- x := len(str) - 1
- if x < 0 || !isDigit(str[x]) {
- return str, 0, liNoNum
- }
-
- mul := 1
- for x > 0 && isDigit(str[x]) {
- n += mul * (int(str[x]) - '0')
- mul *= 10
- x--
- if n < 0 {
- return str, 0, liBadNum
- }
- }
- if x < 0 || str[x] != ':' {
- return str, 0, liBadNum
- }
-
- // trc("---- %q %v %v", str[:x], n, liOK)
- return str[:x], n, liOK
-}
-
-func (s *scanner) rune() rune {
- switch r, sz := utf8.DecodeRune(s.buf[s.off:]); {
- case r == utf8.RuneError && sz == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- case r == utf8.RuneError && sz == 1:
- s.err(s.off, "illegal UTF-8 encoding")
- s.next()
- return r
- default:
- s.nextN(sz)
- return r
- }
-}
-
-func (s *scanner) dot(hasHexMantissa, needFrac bool) {
- // '.' already consumed
- switch {
- case hasHexMantissa:
- if s.hexadecimals() == 0 && needFrac {
- s.err(s.off, "hexadecimal literal has no digits")
- }
- switch s.c {
- case 'p', 'P':
- // ok
- default:
- s.err(s.off, "hexadecimal mantissa requires a 'p' exponent")
- }
- default:
- if s.decimals() == 0 && needFrac {
- panic(todo("%v: %#U", s.position(), s.c))
- }
- }
- switch s.c {
- case 'p', 'P':
- if !hasHexMantissa {
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- }
- fallthrough
- case 'e', 'E':
- s.exponent()
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- return
- }
-
- s.tok.ch = int32(FLOAT)
- case 'i':
- s.next()
- s.tok.ch = int32(IMAG)
- default:
- s.tok.ch = int32(FLOAT)
- }
-}
-
-func (s *scanner) exponent() {
- // Leanding e or E not consumed.
- s.next()
- switch s.c {
- case '+', '-':
- s.next()
- }
- if !isDigit(s.c) {
- s.err(s.base+s.off, "exponent has no digits")
- return
- }
-
- s.decimals()
-}
-
-func (s *scanner) decimals() (r int) {
- first := true
- for {
- switch {
- case isDigit(s.c):
- first = false
- s.next()
- r++
- case s.c == '_':
- for n := 0; s.c == '_'; n++ {
- if first || n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- if !isDigit(s.c) {
- s.err(s.off-1, "'_' must separate successive digits")
- }
- default:
- return r
- }
- }
-}
-
-func (s *scanner) hexadecimals() (r int) {
- for {
- switch {
- case isHexDigit(s.c):
- s.next()
- r++
- case s.c == '_':
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- if !isHexDigit(s.c) {
- s.err(s.off-1, "'_' must separate successive digits")
- }
- default:
- return r
- }
- }
-}
-
-// When the input is broken into tokens, a semicolon is automatically inserted
-// into the token stream immediately after a line's final token if that token
-// is
-//
-// - an identifier
-// - an integer, floating-point, imaginary, rune, or string literal
-// - one of the keywords break, continue, fallthrough, or return
-// - one of the operators and punctuation ++, --, ), ], or }
-func (s *scanner) injectSemi() bool {
- switch token.Token(s.last) {
- case
- IDENT, INT, FLOAT, IMAG, CHAR, STRING,
- BREAK, CONTINUE, FALLTHROUGH, RETURN,
- INC, DEC, RPAREN, RBRACK, RBRACE:
-
- s.tok.ch = int32(SEMICOLON)
- s.last = 0
- if s.c == '\n' {
- s.next()
- }
- return true
- }
-
- s.last = 0
- return false
-}