diff options
Diffstat (limited to 'vendor/modernc.org/gc/v3/scanner.go')
-rw-r--r-- | vendor/modernc.org/gc/v3/scanner.go | 1446 |
1 files changed, 0 insertions, 1446 deletions
diff --git a/vendor/modernc.org/gc/v3/scanner.go b/vendor/modernc.org/gc/v3/scanner.go deleted file mode 100644 index a3815a8f8..000000000 --- a/vendor/modernc.org/gc/v3/scanner.go +++ /dev/null @@ -1,1446 +0,0 @@ -// Copyright 2022 The Gc Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gc // import "modernc.org/gc/v3" - -import ( - "bytes" - "fmt" - "go/token" - "path/filepath" - "strings" - "unicode" - "unicode/utf8" - - "modernc.org/mathutil" - mtoken "modernc.org/token" -) - -var ( - _ Node = (*Token)(nil) - _ Node = (*nonode)(nil) - - keywords = map[string]token.Token{ - "break": BREAK, - "case": CASE, - "chan": CHAN, - "const": CONST, - "continue": CONTINUE, - "default": DEFAULT, - "defer": DEFER, - "else": ELSE, - "fallthrough": FALLTHROUGH, - "for": FOR, - "func": FUNC, - "go": GO, - "goto": GOTO, - "if": IF, - "import": IMPORT, - "interface": INTERFACE, - "map": MAP, - "package": PACKAGE, - "range": RANGE, - "return": RETURN, - "select": SELECT, - "struct": STRUCT, - "switch": SWITCH, - "type": TYPE, - "var": VAR, - } - - lineCommentTag = []byte("line ") - znode = &nonode{} -) - -type nonode struct{} - -func (*nonode) Position() (r token.Position) { return r } -func (*nonode) Source(full bool) string { return "" } - -// Token represents a lexeme, its position and its semantic value. -type Token struct { // 16 bytes on 64 bit arch - source *source - - ch int32 - index int32 -} - -// Ch returns which token t represents -func (t Token) Ch() token.Token { return token.Token(t.ch) } - -// Source implements Node. -func (t Token) Source(full bool) string { - // trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf)) - sep := t.Sep() - if !full && sep != "" { - sep = " " - } - src := t.Src() - if !full && strings.ContainsRune(src, '\n') { - src = " " - } - // trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src) - return sep + src -} - -// Positions implements Node. -func (t Token) Position() (r token.Position) { - if t.source == nil { - return r - } - - s := t.source - off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src) - return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) -} - -// Prev returns the token preceding t or a zero value if no such token exists. -func (t Token) Prev() (r Token) { - if index := t.index - 1; index >= 0 { - s := t.source - return Token{source: s, ch: s.toks[index].ch, index: index} - } - - return r -} - -// Next returns the token following t or a zero value if no such token exists. -func (t Token) Next() (r Token) { - if index := t.index + 1; index < int32(len(t.source.toks)) { - s := t.source - return Token{source: s, ch: s.toks[index].ch, index: index} - } - - return r -} - -// Sep returns any separators, combined, preceding t. -func (t Token) Sep() string { - s := t.source - if p, ok := s.sepPatches[t.index]; ok { - return p - } - - return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src]) -} - -// SetSep sets t's separator. -func (t Token) SetSep(s string) { - src := t.source - if src.sepPatches == nil { - src.sepPatches = map[int32]string{} - } - src.sepPatches[t.index] = s -} - -// Src returns t's source form. -func (t Token) Src() string { - s := t.source - if p, ok := s.srcPatches[t.index]; ok { - return p - } - - if t.ch != int32(EOF) { - next := t.source.off - if t.index < int32(len(s.toks))-1 { - next = s.toks[t.index+1].sep - } - return string(s.buf[s.toks[t.index].src:next]) - } - - return "" -} - -// SetSrc sets t's source form. -func (t Token) SetSrc(s string) { - src := t.source - if src.srcPatches == nil { - src.srcPatches = map[int32]string{} - } - src.srcPatches[t.index] = s -} - -// IsValid reports t is a valid token. Zero value reports false. -func (t Token) IsValid() bool { return t.source != nil } - -type tok struct { // 12 bytes - ch int32 - sep int32 - src int32 -} - -func (t *tok) token() token.Token { return token.Token(t.ch) } - -func (t *tok) position(s *source) (r token.Position) { - off := mathutil.MinInt32(int32(len(s.buf)), t.src) - return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) -} - -// source represents a single Go source file, editor text buffer etc. -type source struct { - buf []byte - file *mtoken.File - name string - sepPatches map[int32]string - srcPatches map[int32]string - toks []tok - - base int32 - off int32 -} - -// 'buf' becomes owned by the result and must not be modified afterwards. -func newSource(name string, buf []byte) *source { - file := mtoken.NewFile(name, len(buf)) - return &source{ - buf: buf, - file: file, - name: name, - base: int32(file.Base()), - } -} - -type ErrWithPosition struct { - pos token.Position - err error -} - -func (e ErrWithPosition) String() string { - switch { - case e.pos.IsValid(): - return fmt.Sprintf("%v: %v", e.pos, e.err) - default: - return fmt.Sprintf("%v", e.err) - } -} - -type errList []ErrWithPosition - -func (e errList) Err() (r error) { - if len(e) == 0 { - return nil - } - - return e -} - -func (e errList) Error() string { - w := 0 - prev := ErrWithPosition{pos: token.Position{Offset: -1}} - for _, v := range e { - if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() { - e[w] = v - w++ - prev = v - } - } - - var a []string - for _, v := range e { - a = append(a, fmt.Sprint(v)) - } - return strings.Join(a, "\n") -} - -func (e *errList) err(pos token.Position, msg string, args ...interface{}) { - if trcErrors { - trc("FAIL "+msg, args...) - } - switch { - case len(args) == 0: - *e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)}) - default: - *e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)}) - } -} - -type scanner struct { - *source - dir string - errs errList - tok tok - - last int32 - - errBudget int - - c byte // Lookahead byte. - - eof bool - isClosed bool -} - -func newScanner(name string, buf []byte) *scanner { - dir, _ := filepath.Split(name) - r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir} - switch { - case len(buf) == 0: - r.eof = true - default: - r.c = buf[0] - if r.c == '\n' { - r.file.AddLine(int(r.base + r.off)) - } - } - return r -} - -func isDigit(c byte) bool { return c >= '0' && c <= '9' } -func isHexDigit(c byte) bool { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' } -func isIDNext(c byte) bool { return isIDFirst(c) || isDigit(c) } -func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' } - -func isIDFirst(c byte) bool { - return c >= 'a' && c <= 'z' || - c >= 'A' && c <= 'Z' || - c == '_' -} - -func (s *scanner) position() token.Position { - return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true)) -} - -func (s *scanner) pos(off int32) token.Position { - return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true)) -} - -func (s *scanner) token() Token { - return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)} -} - -func (s *scanner) err(off int32, msg string, args ...interface{}) { - if s.errBudget <= 0 { - s.close() - return - } - - s.errBudget-- - if n := int32(len(s.buf)); off >= n { - off = n - } - s.errs.err(s.pos(off), msg, args...) -} - -func (s *scanner) close() { - if s.isClosed { - return - } - - s.tok.ch = int32(ILLEGAL) - s.eof = true - s.isClosed = true -} - -func (s *scanner) next() { - if s.eof { - return - } - - s.off++ - if int(s.off) == len(s.buf) { - s.c = 0 - s.eof = true - return - } - - s.c = s.buf[s.off] - if s.c == '\n' { - s.file.AddLine(int(s.base + s.off)) - } -} - -func (s *scanner) nextN(n int) { - if int(s.off) == len(s.buf)-n { - s.c = 0 - s.eof = true - return - } - - s.off += int32(n) - s.c = s.buf[s.off] - if s.c == '\n' { - s.file.AddLine(int(s.base + s.off)) - } -} - -func (s *scanner) scan() (r bool) { - if s.isClosed { - return false - } - - s.last = s.tok.ch - s.tok.sep = s.off - s.tok.ch = -1 - for { - if r = s.scan0(); !r || s.tok.ch >= 0 { - s.toks = append(s.toks, s.tok) - // trc("", dump(s.token())) - return r - } - } -} - -func (s *scanner) scan0() (r bool) { - s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf))) - switch s.c { - case ' ', '\t', '\r', '\n': - // White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage - // returns (U+000D), and newlines (U+000A), is ignored except as it separates - // tokens that would otherwise combine into a single token. - if s.c == '\n' && s.injectSemi() { - return true - } - - s.next() - return true - case '/': - off := s.off - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(QUO_ASSIGN) - case '/': - // Line comments start with the character sequence // and stop at the end of - // the line. - s.next() - s.lineComment(off) - return true - case '*': - // General comments start with the character sequence /* and stop with the - // first subsequent character sequence */. - s.next() - s.generalComment(off) - return true - default: - s.tok.ch = int32(QUO) - } - case '(': - s.tok.ch = int32(LPAREN) - s.next() - case ')': - s.tok.ch = int32(RPAREN) - s.next() - case '[': - s.tok.ch = int32(LBRACK) - s.next() - case ']': - s.tok.ch = int32(RBRACK) - s.next() - case '{': - s.tok.ch = int32(LBRACE) - s.next() - case '}': - s.tok.ch = int32(RBRACE) - s.next() - case ',': - s.tok.ch = int32(COMMA) - s.next() - case ';': - s.tok.ch = int32(SEMICOLON) - s.next() - case '~': - s.tok.ch = int32(TILDE) - s.next() - case '"': - off := s.off - s.next() - s.stringLiteral(off) - case '\'': - off := s.off - s.next() - s.runeLiteral(off) - case '`': - s.next() - for { - switch { - case s.c == '`': - s.next() - s.tok.ch = int32(STRING) - return true - case s.eof: - s.err(s.off, "raw string literal not terminated") - s.tok.ch = int32(STRING) - return true - case s.c == 0: - panic(todo("%v: %#U", s.position(), s.c)) - default: - s.next() - } - } - case '.': - s.next() - off := s.off - if isDigit(s.c) { - s.dot(false, true) - return true - } - - if s.c != '.' { - s.tok.ch = int32(PERIOD) - return true - } - - s.next() - if s.c != '.' { - s.off = off - s.c = '.' - s.tok.ch = int32(PERIOD) - return true - } - - s.next() - s.tok.ch = int32(ELLIPSIS) - return true - case '%': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(REM_ASSIGN) - default: - s.tok.ch = int32(REM) - } - case '*': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(MUL_ASSIGN) - default: - s.tok.ch = int32(MUL) - } - case '^': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(XOR_ASSIGN) - default: - s.tok.ch = int32(XOR) - } - case '+': - s.next() - switch s.c { - case '+': - s.next() - s.tok.ch = int32(INC) - case '=': - s.next() - s.tok.ch = int32(ADD_ASSIGN) - default: - s.tok.ch = int32(ADD) - } - case '-': - s.next() - switch s.c { - case '-': - s.next() - s.tok.ch = int32(DEC) - case '=': - s.next() - s.tok.ch = int32(SUB_ASSIGN) - default: - s.tok.ch = int32(SUB) - } - case ':': - s.next() - switch { - case s.c == '=': - s.next() - s.tok.ch = int32(DEFINE) - default: - s.tok.ch = int32(COLON) - } - case '=': - s.next() - switch { - case s.c == '=': - s.next() - s.tok.ch = int32(EQL) - default: - s.tok.ch = int32(ASSIGN) - } - case '!': - s.next() - switch { - case s.c == '=': - s.next() - s.tok.ch = int32(NEQ) - default: - s.tok.ch = int32(NOT) - } - case '>': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(GEQ) - case '>': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(SHR_ASSIGN) - default: - s.tok.ch = int32(SHR) - } - default: - s.tok.ch = int32(GTR) - } - case '<': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(LEQ) - case '<': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(SHL_ASSIGN) - default: - s.tok.ch = int32(SHL) - } - case '-': - s.next() - s.tok.ch = int32(ARROW) - default: - s.tok.ch = int32(LSS) - } - case '|': - s.next() - switch s.c { - case '|': - s.next() - s.tok.ch = int32(LOR) - case '=': - s.next() - s.tok.ch = int32(OR_ASSIGN) - default: - s.tok.ch = int32(OR) - } - case '&': - s.next() - switch s.c { - case '&': - s.next() - s.tok.ch = int32(LAND) - case '^': - s.next() - switch s.c { - case '=': - s.next() - s.tok.ch = int32(AND_NOT_ASSIGN) - default: - s.tok.ch = int32(AND_NOT) - } - case '=': - s.next() - s.tok.ch = int32(AND_ASSIGN) - default: - s.tok.ch = int32(AND) - } - default: - switch { - case isIDFirst(s.c): - s.next() - s.identifierOrKeyword() - case isDigit(s.c): - s.numericLiteral() - case s.c >= 0x80: - off := s.off - switch r := s.rune(); { - case unicode.IsLetter(r): - s.identifierOrKeyword() - case r == 0xfeff: - if off == 0 { // Ignore BOM, but only at buffer start. - return true - } - - s.err(off, "illegal byte order mark") - s.tok.ch = int32(ILLEGAL) - default: - s.err(s.off, "illegal character %#U", r) - s.tok.ch = int32(ILLEGAL) - } - case s.eof: - if s.injectSemi() { - return true - } - - s.close() - s.tok.ch = int32(EOF) - s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src) - return false - // case s.c == 0: - // panic(todo("%v: %#U", s.position(), s.c)) - default: - s.err(s.off, "illegal character %#U", s.c) - s.next() - s.tok.ch = int32(ILLEGAL) - } - } - return true -} - -func (s *scanner) runeLiteral(off int32) { - // Leading ' consumed. - ok := 0 - s.tok.ch = int32(CHAR) - expOff := int32(-1) - if s.eof { - s.err(off, "rune literal not terminated") - return - } - - for { - switch s.c { - case '\\': - ok++ - s.next() - switch s.c { - case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v': - s.next() - case 'x', 'X': - s.next() - for i := 0; i < 2; i++ { - if s.c == '\'' { - if i != 2 { - s.err(s.off, "illegal character %#U in escape sequence", s.c) - } - s.next() - return - } - - if !isHexDigit(s.c) { - s.err(s.off, "illegal character %#U in escape sequence", s.c) - break - } - s.next() - } - case 'u': - s.u(4) - case 'U': - s.u(8) - default: - switch { - case s.eof: - s.err(s.base+s.off, "escape sequence not terminated") - return - case isOctalDigit(s.c): - for i := 0; i < 3; i++ { - s.next() - if s.c == '\'' { - if i != 2 { - s.err(s.off, "illegal character %#U in escape sequence", s.c) - } - s.next() - return - } - - if !isOctalDigit(s.c) { - s.err(s.off, "illegal character %#U in escape sequence", s.c) - break - } - } - default: - s.err(s.off, "unknown escape sequence") - } - } - case '\'': - s.next() - if ok != 1 { - s.err(off, "illegal rune literal") - } - return - case '\t': - s.next() - ok++ - default: - switch { - case s.eof: - switch { - case ok != 0: - s.err(expOff, "rune literal not terminated") - default: - s.err(s.base+s.off, "rune literal not terminated") - } - return - case s.c == 0: - panic(todo("%v: %#U", s.position(), s.c)) - case s.c < ' ': - ok++ - s.err(s.off, "non-printable character: %#U", s.c) - s.next() - case s.c >= 0x80: - ok++ - off := s.off - if c := s.rune(); c == 0xfeff { - s.err(off, "illegal byte order mark") - } - default: - ok++ - s.next() - } - } - if ok != 0 && expOff < 0 { - expOff = s.off - if s.eof { - expOff++ - } - } - } -} - -func (s *scanner) stringLiteral(off int32) { - // Leadind " consumed. - s.tok.ch = int32(STRING) - for { - switch { - case s.c == '"': - s.next() - return - case s.c == '\\': - s.next() - switch s.c { - case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v': - s.next() - continue - case 'x', 'X': - s.next() - if !isHexDigit(s.c) { - panic(todo("%v: %#U", s.position(), s.c)) - } - - s.next() - if !isHexDigit(s.c) { - panic(todo("%v: %#U", s.position(), s.c)) - } - - s.next() - continue - case 'u': - s.u(4) - continue - case 'U': - s.u(8) - continue - default: - switch { - case isOctalDigit(s.c): - s.next() - if isOctalDigit(s.c) { - s.next() - } - if isOctalDigit(s.c) { - s.next() - } - continue - default: - s.err(off-1, "unknown escape sequence") - } - } - case s.c == '\n': - fallthrough - case s.eof: - s.err(off, "string literal not terminated") - return - case s.c == 0: - s.err(s.off, "illegal character NUL") - } - - switch { - case s.c >= 0x80: - off := s.off - if s.rune() == 0xfeff { - s.err(off, "illegal byte order mark") - } - continue - } - - s.next() - } -} - -func (s *scanner) u(n int) (r rune) { - // Leading u/U not consumed. - s.next() - off := s.off - for i := 0; i < n; i++ { - switch { - case isHexDigit(s.c): - var n rune - switch { - case s.c >= '0' && s.c <= '9': - n = rune(s.c) - '0' - case s.c >= 'a' && s.c <= 'f': - n = rune(s.c) - 'a' + 10 - case s.c >= 'A' && s.c <= 'F': - n = rune(s.c) - 'A' + 10 - } - r = 16*r + n - default: - switch { - case s.eof: - s.err(s.base+s.off, "escape sequence not terminated") - default: - s.err(s.off, "illegal character %#U in escape sequence", s.c) - } - return r - } - - s.next() - } - if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff { - s.err(off-1, "escape sequence is invalid Unicode code point") - } - return r -} - -func (s *scanner) identifierOrKeyword() { -out: - for { - switch { - case isIDNext(s.c): - s.next() - case s.c >= 0x80: - off := s.off - c := s.c - switch r := s.rune(); { - case unicode.IsLetter(r) || unicode.IsDigit(r): - // already consumed - default: - s.off = off - s.c = c - break out - } - case s.eof: - break out - case s.c == 0: - s.err(s.off, "illegal character NUL") - break out - default: - break out - } - } - if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 { - s.tok.ch = int32(IDENT) - } -} - -func (s *scanner) numericLiteral() { - // Leading decimal digit not consumed. - var hasHexMantissa, needFrac bool -more: - switch s.c { - case '0': - s.next() - switch s.c { - case '.': - // nop - case 'b', 'B': - s.next() - s.binaryLiteral() - return - case 'e', 'E': - s.exponent() - s.tok.ch = int32(FLOAT) - return - case 'p', 'P': - s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) - s.exponent() - s.tok.ch = int32(FLOAT) - return - case 'o', 'O': - s.next() - s.octalLiteral() - return - case 'x', 'X': - hasHexMantissa = true - needFrac = true - s.tok.ch = int32(INT) - s.next() - if s.c == '.' { - s.next() - s.dot(hasHexMantissa, needFrac) - return - } - - if s.hexadecimals() == 0 { - s.err(s.base+s.off, "hexadecimal literal has no digits") - return - } - - needFrac = false - case 'i': - s.next() - s.tok.ch = int32(IMAG) - return - default: - invalidOff := int32(-1) - var invalidDigit byte - for { - if s.c == '_' { - for n := 0; s.c == '_'; n++ { - if n == 1 { - s.err(s.off, "'_' must separate successive digits") - } - s.next() - } - if !isDigit(s.c) { - s.err(s.off-1, "'_' must separate successive digits") - } - } - if isOctalDigit(s.c) { - s.next() - continue - } - - if isDigit(s.c) { - if invalidOff < 0 { - invalidOff = s.off - invalidDigit = s.c - } - s.next() - continue - } - - break - } - switch s.c { - case '.', 'e', 'E', 'i': - break more - } - if isDigit(s.c) { - break more - } - if invalidOff > 0 { - s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit) - } - s.tok.ch = int32(INT) - return - } - default: - s.decimals() - } - switch s.c { - case '.': - s.next() - s.dot(hasHexMantissa, needFrac) - case 'p', 'P': - if !hasHexMantissa { - s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) - } - fallthrough - case 'e', 'E': - s.exponent() - if s.c == 'i' { - s.next() - s.tok.ch = int32(IMAG) - return - } - - s.tok.ch = int32(FLOAT) - case 'i': - s.next() - s.tok.ch = int32(IMAG) - default: - s.tok.ch = int32(INT) - } -} - -func (s *scanner) octalLiteral() { - // Leading 0o consumed. - ok := false - invalidOff := int32(-1) - var invalidDigit byte - s.tok.ch = int32(INT) - for { - for n := 0; s.c == '_'; n++ { - if n == 1 { - s.err(s.off, "'_' must separate successive digits") - } - s.next() - } - switch s.c { - case '0', '1', '2', '3', '4', '5', '6', '7': - s.next() - ok = true - case '8', '9': - if invalidOff < 0 { - invalidOff = s.off - invalidDigit = s.c - } - s.next() - case '.': - s.tok.ch = int32(FLOAT) - s.err(s.off, "invalid radix point in octal literal") - s.next() - case 'e', 'E': - s.tok.ch = int32(FLOAT) - s.err(s.off, "'%c' exponent requires decimal mantissa", s.c) - s.exponent() - case 'p', 'P': - s.tok.ch = int32(FLOAT) - s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) - s.exponent() - default: - switch { - case !ok: - s.err(s.base+s.off, "octal literal has no digits") - case invalidOff > 0: - s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit) - } - if s.c == 'i' { - s.next() - s.tok.ch = int32(IMAG) - } - return - } - } -} - -func (s *scanner) binaryLiteral() { - // Leading 0b consumed. - ok := false - invalidOff := int32(-1) - var invalidDigit byte - s.tok.ch = int32(INT) - for { - for n := 0; s.c == '_'; n++ { - if n == 1 { - s.err(s.off, "'_' must separate successive digits") - } - s.next() - } - switch s.c { - case '0', '1': - s.next() - ok = true - case '.': - s.tok.ch = int32(FLOAT) - s.err(s.off, "invalid radix point in binary literal") - s.next() - case 'e', 'E': - s.tok.ch = int32(FLOAT) - s.err(s.off, "'%c' exponent requires decimal mantissa", s.c) - s.exponent() - case 'p', 'P': - s.tok.ch = int32(FLOAT) - s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) - s.exponent() - default: - if isDigit(s.c) { - if invalidOff < 0 { - invalidOff = s.off - invalidDigit = s.c - } - s.next() - continue - } - - switch { - case !ok: - s.err(s.base+s.off, "binary literal has no digits") - case invalidOff > 0: - s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit) - } - if s.c == 'i' { - s.next() - s.tok.ch = int32(IMAG) - } - return - } - } -} - -func (s *scanner) generalComment(off int32) (injectSemi bool) { - // Leading /* consumed - off0 := s.off - 2 - var nl bool - for { - switch { - case s.c == '*': - s.next() - switch s.c { - case '/': - s.lineInfo(off0, s.off+1) - s.next() - if nl { - return s.injectSemi() - } - - return false - } - case s.c == '\n': - nl = true - s.next() - case s.eof: - s.tok.ch = 0 - s.err(off, "comment not terminated") - return true - case s.c == 0: - panic(todo("%v: %#U", s.position(), s.c)) - default: - s.next() - } - } -} - -func (s *scanner) lineComment(off int32) (injectSemi bool) { - // Leading // consumed - off0 := s.off - 2 - for { - switch { - case s.c == '\n': - s.lineInfo(off0, s.off+1) - if s.injectSemi() { - return true - } - - s.next() - return false - case s.c >= 0x80: - if c := s.rune(); c == 0xfeff { - s.err(off+2, "illegal byte order mark") - } - case s.eof: - s.off++ - if s.injectSemi() { - return true - } - - return false - case s.c == 0: - return false - default: - s.next() - } - } -} - -func (s *scanner) lineInfo(off, next int32) { - if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' { - return - } - - str := s.buf[off:next] - if !bytes.HasPrefix(str[len("//"):], lineCommentTag) { - return - } - - switch { - case str[1] == '*': - str = str[:len(str)-len("*/")] - default: - str = str[:len(str)-len("\n")] - } - str = str[len("//"):] - - str, ln, ok := s.lineInfoNum(str[len("line "):]) - col := 0 - if ok == liBadNum || ok == liNoNum { - return - } - - hasCol := false - var n int - if str, n, ok = s.lineInfoNum(str); ok == liBadNum { - return - } - - if ok != liNoNum { - col = ln - ln = n - hasCol = true - } - - fn := strings.TrimSpace(string(str)) - switch { - case fn == "" && hasCol: - fn = s.pos(off).Filename - case fn != "": - fn = filepath.Clean(fn) - if !filepath.IsAbs(fn) { - fn = filepath.Join(s.dir, fn) - } - } - // trc("set %v %q %v %v", next, fn, ln, col) - s.file.AddLineColumnInfo(int(next), fn, ln, col) -} - -const ( - liNoNum = iota - liBadNum - liOK -) - -func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) { - // trc("==== %q", str) - x := len(str) - 1 - if x < 0 || !isDigit(str[x]) { - return str, 0, liNoNum - } - - mul := 1 - for x > 0 && isDigit(str[x]) { - n += mul * (int(str[x]) - '0') - mul *= 10 - x-- - if n < 0 { - return str, 0, liBadNum - } - } - if x < 0 || str[x] != ':' { - return str, 0, liBadNum - } - - // trc("---- %q %v %v", str[:x], n, liOK) - return str[:x], n, liOK -} - -func (s *scanner) rune() rune { - switch r, sz := utf8.DecodeRune(s.buf[s.off:]); { - case r == utf8.RuneError && sz == 0: - panic(todo("%v: %#U", s.position(), s.c)) - case r == utf8.RuneError && sz == 1: - s.err(s.off, "illegal UTF-8 encoding") - s.next() - return r - default: - s.nextN(sz) - return r - } -} - -func (s *scanner) dot(hasHexMantissa, needFrac bool) { - // '.' already consumed - switch { - case hasHexMantissa: - if s.hexadecimals() == 0 && needFrac { - s.err(s.off, "hexadecimal literal has no digits") - } - switch s.c { - case 'p', 'P': - // ok - default: - s.err(s.off, "hexadecimal mantissa requires a 'p' exponent") - } - default: - if s.decimals() == 0 && needFrac { - panic(todo("%v: %#U", s.position(), s.c)) - } - } - switch s.c { - case 'p', 'P': - if !hasHexMantissa { - s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c) - } - fallthrough - case 'e', 'E': - s.exponent() - if s.c == 'i' { - s.next() - s.tok.ch = int32(IMAG) - return - } - - s.tok.ch = int32(FLOAT) - case 'i': - s.next() - s.tok.ch = int32(IMAG) - default: - s.tok.ch = int32(FLOAT) - } -} - -func (s *scanner) exponent() { - // Leanding e or E not consumed. - s.next() - switch s.c { - case '+', '-': - s.next() - } - if !isDigit(s.c) { - s.err(s.base+s.off, "exponent has no digits") - return - } - - s.decimals() -} - -func (s *scanner) decimals() (r int) { - first := true - for { - switch { - case isDigit(s.c): - first = false - s.next() - r++ - case s.c == '_': - for n := 0; s.c == '_'; n++ { - if first || n == 1 { - s.err(s.off, "'_' must separate successive digits") - } - s.next() - } - if !isDigit(s.c) { - s.err(s.off-1, "'_' must separate successive digits") - } - default: - return r - } - } -} - -func (s *scanner) hexadecimals() (r int) { - for { - switch { - case isHexDigit(s.c): - s.next() - r++ - case s.c == '_': - for n := 0; s.c == '_'; n++ { - if n == 1 { - s.err(s.off, "'_' must separate successive digits") - } - s.next() - } - if !isHexDigit(s.c) { - s.err(s.off-1, "'_' must separate successive digits") - } - default: - return r - } - } -} - -// When the input is broken into tokens, a semicolon is automatically inserted -// into the token stream immediately after a line's final token if that token -// is -// -// - an identifier -// - an integer, floating-point, imaginary, rune, or string literal -// - one of the keywords break, continue, fallthrough, or return -// - one of the operators and punctuation ++, --, ), ], or } -func (s *scanner) injectSemi() bool { - switch token.Token(s.last) { - case - IDENT, INT, FLOAT, IMAG, CHAR, STRING, - BREAK, CONTINUE, FALLTHROUGH, RETURN, - INC, DEC, RPAREN, RBRACK, RBRACE: - - s.tok.ch = int32(SEMICOLON) - s.last = 0 - if s.c == '\n' { - s.next() - } - return true - } - - s.last = 0 - return false -} |