summaryrefslogtreecommitdiff
path: root/vendor/github.com/temoto/robotstxt/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/temoto/robotstxt/scanner.go')
-rw-r--r--vendor/github.com/temoto/robotstxt/scanner.go185
1 files changed, 0 insertions, 185 deletions
diff --git a/vendor/github.com/temoto/robotstxt/scanner.go b/vendor/github.com/temoto/robotstxt/scanner.go
deleted file mode 100644
index 6bd98c2ec..000000000
--- a/vendor/github.com/temoto/robotstxt/scanner.go
+++ /dev/null
@@ -1,185 +0,0 @@
-package robotstxt
-
-import (
- "bytes"
- "fmt"
- "go/token"
- "os"
- "sync"
- "unicode/utf8"
-)
-
-type byteScanner struct {
- pos token.Position
- buf []byte
- ErrorCount int
- ch rune
- Quiet bool
- keyTokenFound bool
- lastChunk bool
-}
-
-const tokEOL = "\n"
-
-var WhitespaceChars = []rune{' ', '\t', '\v'}
-var tokBuffers = sync.Pool{New: func() interface{} { return bytes.NewBuffer(make([]byte, 32)) }}
-
-func newByteScanner(srcname string, quiet bool) *byteScanner {
- return &byteScanner{
- Quiet: quiet,
- ch: -1,
- pos: token.Position{Filename: srcname},
- }
-}
-
-func (s *byteScanner) feed(input []byte, end bool) {
- s.buf = input
- s.pos.Offset = 0
- s.pos.Line = 1
- s.pos.Column = 1
- s.lastChunk = end
-
- // Read first char into look-ahead buffer `s.ch`.
- if !s.nextChar() {
- return
- }
-
- // Skip UTF-8 byte order mark
- if s.ch == 65279 {
- s.nextChar()
- s.pos.Column = 1
- }
-}
-
-func (s *byteScanner) GetPosition() token.Position {
- return s.pos
-}
-
-func (s *byteScanner) scan() string {
- // Note Offset > len, not >=, so we can scan last character.
- if s.lastChunk && s.pos.Offset > len(s.buf) {
- return ""
- }
-
- s.skipSpace()
-
- if s.ch == -1 {
- return ""
- }
-
- // EOL
- if s.isEol() {
- s.keyTokenFound = false
- // skip subsequent newline chars
- for s.ch != -1 && s.isEol() {
- s.nextChar()
- }
- // emit newline as separate token
- return tokEOL
- }
-
- // skip comments
- if s.ch == '#' {
- s.keyTokenFound = false
- s.skipUntilEol()
- if s.ch == -1 {
- return ""
- }
- // emit newline as separate token
- return tokEOL
- }
-
- // else we found something
- tok := tokBuffers.Get().(*bytes.Buffer)
- defer tokBuffers.Put(tok)
- tok.Reset()
- tok.WriteRune(s.ch)
- s.nextChar()
- for s.ch != -1 && !s.isSpace() && !s.isEol() {
- // Do not consider ":" to be a token separator if a first key token
- // has already been found on this line (avoid cutting an absolute URL
- // after the "http:")
- if s.ch == ':' && !s.keyTokenFound {
- s.nextChar()
- s.keyTokenFound = true
- break
- }
-
- tok.WriteRune(s.ch)
- s.nextChar()
- }
- return tok.String()
-}
-
-func (s *byteScanner) scanAll() []string {
- results := make([]string, 0, 64) // random guess of average tokens length
- for {
- token := s.scan()
- if token != "" {
- results = append(results, token)
- } else {
- break
- }
- }
- return results
-}
-
-func (s *byteScanner) error(pos token.Position, msg string) {
- s.ErrorCount++
- if !s.Quiet {
- fmt.Fprintf(os.Stderr, "robotstxt from %s: %s\n", pos.String(), msg)
- }
-}
-
-func (s *byteScanner) isEol() bool {
- return s.ch == '\n' || s.ch == '\r'
-}
-
-func (s *byteScanner) isSpace() bool {
- for _, r := range WhitespaceChars {
- if s.ch == r {
- return true
- }
- }
- return false
-}
-
-func (s *byteScanner) skipSpace() {
- for s.ch != -1 && s.isSpace() {
- s.nextChar()
- }
-}
-
-func (s *byteScanner) skipUntilEol() {
- for s.ch != -1 && !s.isEol() {
- s.nextChar()
- }
- // skip subsequent newline chars
- for s.ch != -1 && s.isEol() {
- s.nextChar()
- }
-}
-
-// Reads next Unicode char.
-func (s *byteScanner) nextChar() bool {
- if s.pos.Offset >= len(s.buf) {
- s.ch = -1
- return false
- }
- s.pos.Column++
- if s.ch == '\n' {
- s.pos.Line++
- s.pos.Column = 1
- }
- r, w := rune(s.buf[s.pos.Offset]), 1
- if r >= 0x80 {
- r, w = utf8.DecodeRune(s.buf[s.pos.Offset:])
- if r == utf8.RuneError && w == 1 {
- s.error(s.pos, "illegal UTF-8 encoding")
- }
- }
- s.pos.Column++
- s.pos.Offset += w
- s.ch = r
- return true
-}