summaryrefslogtreecommitdiff
path: root/vendor/github.com/temoto/robotstxt
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/temoto/robotstxt')
-rw-r--r--vendor/github.com/temoto/robotstxt/.gitignore15
-rw-r--r--vendor/github.com/temoto/robotstxt/.golangci.yml20
-rw-r--r--vendor/github.com/temoto/robotstxt/.travis.yml30
-rw-r--r--vendor/github.com/temoto/robotstxt/LICENSE21
-rw-r--r--vendor/github.com/temoto/robotstxt/README.rst115
-rw-r--r--vendor/github.com/temoto/robotstxt/codecov.yml2
-rw-r--r--vendor/github.com/temoto/robotstxt/fuzz.go29
-rw-r--r--vendor/github.com/temoto/robotstxt/parser.go271
-rw-r--r--vendor/github.com/temoto/robotstxt/robotstxt.go227
-rw-r--r--vendor/github.com/temoto/robotstxt/scanner.go185
10 files changed, 0 insertions, 915 deletions
diff --git a/vendor/github.com/temoto/robotstxt/.gitignore b/vendor/github.com/temoto/robotstxt/.gitignore
deleted file mode 100644
index 6205f9eae..000000000
--- a/vendor/github.com/temoto/robotstxt/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-*.cgo?.*
-*.o
-*.so
-*.sublime-*
-*.zip
-.DS_Store
-.idea/
-.tags*
-_cgo_*
-_gofuzz/crashers/
-_gofuzz/suppressions/
-_obj
-_test
-coverage.txt
-robots.txt-check/robots.txt-check
diff --git a/vendor/github.com/temoto/robotstxt/.golangci.yml b/vendor/github.com/temoto/robotstxt/.golangci.yml
deleted file mode 100644
index 24e5858fa..000000000
--- a/vendor/github.com/temoto/robotstxt/.golangci.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-linters:
- enable:
- - goconst
- - gofmt
- - gosec
- - maligned
- - prealloc
- - staticcheck
- disable:
- - deadcode
- - structcheck
- - varcheck
-
-linters-settings:
- gofmt:
- simplify: true
- govet:
- check-shadowing: true
- maligned:
- suggest-new: true
diff --git a/vendor/github.com/temoto/robotstxt/.travis.yml b/vendor/github.com/temoto/robotstxt/.travis.yml
deleted file mode 100644
index ad90dac37..000000000
--- a/vendor/github.com/temoto/robotstxt/.travis.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-cache:
- go: true
- directories:
- - $HOME/.cache
- - $HOME/bin
- - $HOME/gopath/pkg/mod
-language: go
-go:
-- 1.11
-- 1.12
-- 1.13
-- 1.14
-- 1.x
-- master
-install: true
-script: GO111MODULE=on go test -race
-
-matrix:
- include:
- - go: 1.x
- env: task=coverage
- script: GO111MODULE=on go test -race -covermode=atomic -coverprofile=coverage.txt
- after_success: bash <(curl -s https://codecov.io/bash)
- - go: 1.x
- env: task=bench
- script: GO111MODULE=on ./script/bench
- - go: 1.x
- install: curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s -- -b $HOME/bin v1.19.1
- env: task=clean
- script: GO111MODULE=on ./script/clean
diff --git a/vendor/github.com/temoto/robotstxt/LICENSE b/vendor/github.com/temoto/robotstxt/LICENSE
deleted file mode 100644
index c125145b6..000000000
--- a/vendor/github.com/temoto/robotstxt/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License
-
-Copyright (c) 2010 Sergey Shepelev <temotor@gmail.com>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/vendor/github.com/temoto/robotstxt/README.rst b/vendor/github.com/temoto/robotstxt/README.rst
deleted file mode 100644
index 92f1ae161..000000000
--- a/vendor/github.com/temoto/robotstxt/README.rst
+++ /dev/null
@@ -1,115 +0,0 @@
-What
-====
-
-This is a robots.txt exclusion protocol implementation for Go language (golang).
-
-
-Build
-=====
-
-To build and run tests run `go test` in source directory.
-
-
-Contribute
-==========
-
-Warm welcome.
-
-* If desired, add your name in README.rst, section Who.
-* Run `script/test && script/clean && echo ok`
-* You can ignore linter warnings, but everything else must pass.
-* Send your change as pull request or just a regular patch to current maintainer (see section Who).
-
-Thank you.
-
-
-Usage
-=====
-
-As usual, no special installation is required, just
-
- import "github.com/temoto/robotstxt"
-
-run `go get` and you're ready.
-
-1. Parse
-^^^^^^^^
-
-First of all, you need to parse robots.txt data. You can do it with
-functions `FromBytes(body []byte) (*RobotsData, error)` or same for `string`::
-
- robots, err := robotstxt.FromBytes([]byte("User-agent: *\nDisallow:"))
- robots, err := robotstxt.FromString("User-agent: *\nDisallow:")
-
-As of 2012-10-03, `FromBytes` is the most efficient method, everything else
-is a wrapper for this core function.
-
-There are few convenient constructors for various purposes:
-
-* `FromResponse(*http.Response) (*RobotsData, error)` to init robots data
-from HTTP response. It *does not* call `response.Body.Close()`::
-
- robots, err := robotstxt.FromResponse(resp)
- resp.Body.Close()
- if err != nil {
- log.Println("Error parsing robots.txt:", err.Error())
- }
-
-* `FromStatusAndBytes(statusCode int, body []byte) (*RobotsData, error)` or
-`FromStatusAndString` if you prefer to read bytes (string) yourself.
-Passing status code applies following logic in line with Google's interpretation
-of robots.txt files:
-
- * status 2xx -> parse body with `FromBytes` and apply rules listed there.
- * status 4xx -> allow all (even 401/403, as recommended by Google).
- * other (5xx) -> disallow all, consider this a temporary unavailability.
-
-2. Query
-^^^^^^^^
-
-Parsing robots.txt content builds a kind of logic database, which you can
-query with `(r *RobotsData) TestAgent(url, agent string) (bool)`.
-
-Explicit passing of agent is useful if you want to query for different agents. For
-single agent users there is an efficient option: `RobotsData.FindGroup(userAgent string)`
-returns a structure with `.Test(path string)` method and `.CrawlDelay time.Duration`.
-
-Simple query with explicit user agent. Each call will scan all rules.
-
-::
-
- allow := robots.TestAgent("/", "FooBot")
-
-Or query several paths against same user agent for performance.
-
-::
-
- group := robots.FindGroup("BarBot")
- group.Test("/")
- group.Test("/download.mp3")
- group.Test("/news/article-2012-1")
-
-
-Who
-===
-
-Honorable contributors (in undefined order):
-
- * Ilya Grigorik (igrigorik)
- * Martin Angers (PuerkitoBio)
- * Micha Gorelick (mynameisfiber)
-
-Initial commit and other: Sergey Shepelev temotor@gmail.com
-
-
-Flair
-=====
-
-.. image:: https://travis-ci.org/temoto/robotstxt.svg?branch=master
- :target: https://travis-ci.org/temoto/robotstxt
-
-.. image:: https://codecov.io/gh/temoto/robotstxt/branch/master/graph/badge.svg
- :target: https://codecov.io/gh/temoto/robotstxt
-
-.. image:: https://goreportcard.com/badge/github.com/temoto/robotstxt
- :target: https://goreportcard.com/report/github.com/temoto/robotstxt
diff --git a/vendor/github.com/temoto/robotstxt/codecov.yml b/vendor/github.com/temoto/robotstxt/codecov.yml
deleted file mode 100644
index b80be28f6..000000000
--- a/vendor/github.com/temoto/robotstxt/codecov.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-codecov:
- token: 6bf9c7eb-69ff-4b74-8464-e2fb452d0f04
diff --git a/vendor/github.com/temoto/robotstxt/fuzz.go b/vendor/github.com/temoto/robotstxt/fuzz.go
deleted file mode 100644
index de4b0587a..000000000
--- a/vendor/github.com/temoto/robotstxt/fuzz.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// +build gofuzz
-
-package robotstxt
-
-import "testing/quick"
-
-func Fuzz(data []byte) int {
- r, err := FromBytes(data)
- if err != nil {
- if r != nil {
- panic("r != nil on error")
- }
- return 0
- }
-
- // FindGroup must never return nil
- f1 := func(agent string) bool { return r.FindGroup(agent) != nil }
- if err := quick.Check(f1, nil); err != nil {
- panic(err)
- }
-
- // just check TestAgent doesn't panic
- f2 := func(path, agent string) bool { r.TestAgent(path, agent); return true }
- if err := quick.Check(f2, nil); err != nil {
- panic(err)
- }
-
- return 1
-}
diff --git a/vendor/github.com/temoto/robotstxt/parser.go b/vendor/github.com/temoto/robotstxt/parser.go
deleted file mode 100644
index 46eb6b184..000000000
--- a/vendor/github.com/temoto/robotstxt/parser.go
+++ /dev/null
@@ -1,271 +0,0 @@
-package robotstxt
-
-// Comments explaining the logic are taken from either the google's spec:
-// https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
-//
-// or the Wikipedia's entry on robots.txt:
-// http://en.wikipedia.org/wiki/Robots.txt
-
-import (
- "fmt"
- "io"
- "math"
- "regexp"
- "strconv"
- "strings"
- "time"
-)
-
-type lineType uint
-
-const (
- lIgnore lineType = iota
- lUnknown
- lUserAgent
- lAllow
- lDisallow
- lCrawlDelay
- lSitemap
- lHost
-)
-
-type parser struct {
- tokens []string
- pos int
-}
-
-type lineInfo struct {
- t lineType // Type of line key
- k string // String representation of the type of key
- vs string // String value of the key
- vf float64 // Float value of the key
- vr *regexp.Regexp // Regexp value of the key
-}
-
-func newParser(tokens []string) *parser {
- return &parser{tokens: tokens}
-}
-
-func parseGroupMap(groups map[string]*Group, agents []string, fun func(*Group)) {
- var g *Group
- for _, a := range agents {
- if g = groups[a]; g == nil {
- g = new(Group)
- groups[a] = g
- }
- fun(g)
- }
-}
-
-func (p *parser) parseAll() (groups map[string]*Group, host string, sitemaps []string, errs []error) {
- groups = make(map[string]*Group, 16)
- agents := make([]string, 0, 4)
- isEmptyGroup := true
-
- // Reset internal fields, tokens are assigned at creation time, never change
- p.pos = 0
-
- for {
- if li, err := p.parseLine(); err != nil {
- if err == io.EOF {
- break
- }
- errs = append(errs, err)
- } else {
- switch li.t {
- case lUserAgent:
- // Two successive user-agent lines are part of the same group.
- if !isEmptyGroup {
- // End previous group
- agents = make([]string, 0, 4)
- }
- if len(agents) == 0 {
- isEmptyGroup = true
- }
- agents = append(agents, li.vs)
-
- case lDisallow:
- // Error if no current group
- if len(agents) == 0 {
- errs = append(errs, fmt.Errorf("Disallow before User-agent at token #%d.", p.pos))
- } else {
- isEmptyGroup = false
- var r *rule
- if li.vr != nil {
- r = &rule{"", false, li.vr}
- } else {
- r = &rule{li.vs, false, nil}
- }
- parseGroupMap(groups, agents, func(g *Group) { g.rules = append(g.rules, r) })
- }
-
- case lAllow:
- // Error if no current group
- if len(agents) == 0 {
- errs = append(errs, fmt.Errorf("Allow before User-agent at token #%d.", p.pos))
- } else {
- isEmptyGroup = false
- var r *rule
- if li.vr != nil {
- r = &rule{"", true, li.vr}
- } else {
- r = &rule{li.vs, true, nil}
- }
- parseGroupMap(groups, agents, func(g *Group) { g.rules = append(g.rules, r) })
- }
-
- case lHost:
- host = li.vs
-
- case lSitemap:
- sitemaps = append(sitemaps, li.vs)
-
- case lCrawlDelay:
- if len(agents) == 0 {
- errs = append(errs, fmt.Errorf("Crawl-delay before User-agent at token #%d.", p.pos))
- } else {
- isEmptyGroup = false
- delay := time.Duration(li.vf * float64(time.Second))
- parseGroupMap(groups, agents, func(g *Group) { g.CrawlDelay = delay })
- }
- }
- }
- }
- return
-}
-
-func (p *parser) parseLine() (li *lineInfo, err error) {
- t1, ok1 := p.popToken()
- if !ok1 {
- // proper EOF
- return nil, io.EOF
- }
-
- t2, ok2 := p.peekToken()
- if !ok2 {
- // EOF, no value associated with the token, so ignore token and return
- return nil, io.EOF
- }
-
- // Helper closure for all string-based tokens, common behaviour:
- // - Consume t2 token
- // - If empty, return unknown line info
- // - Otherwise return the specified line info
- returnStringVal := func(t lineType) (*lineInfo, error) {
- p.popToken()
- if t2 != "" {
- return &lineInfo{t: t, k: t1, vs: t2}, nil
- }
- return &lineInfo{t: lIgnore}, nil
- }
-
- // Helper closure for all path tokens (allow/disallow), common behaviour:
- // - Consume t2 token
- // - If empty, return unknown line info
- // - Otherwise, normalize the path (add leading "/" if missing, remove trailing "*")
- // - Detect if wildcards are present, if so, compile into a regexp
- // - Return the specified line info
- returnPathVal := func(t lineType) (*lineInfo, error) {
- p.popToken()
- if t2 != "" {
- if !strings.HasPrefix(t2, "*") && !strings.HasPrefix(t2, "/") {
- t2 = "/" + t2
- }
- t2 = strings.TrimRightFunc(t2, isAsterisk)
- // From google's spec:
- // Google, Bing, Yahoo, and Ask support a limited form of
- // "wildcards" for path values. These are:
- // * designates 0 or more instances of any valid character
- // $ designates the end of the URL
- if strings.ContainsAny(t2, "*$") {
- // Must compile a regexp, this is a pattern.
- // Escape string before compile.
- t2 = regexp.QuoteMeta(t2)
- t2 = strings.Replace(t2, `\*`, `.*`, -1)
- t2 = strings.Replace(t2, `\$`, `$`, -1)
- if r, e := regexp.Compile(t2); e != nil {
- return nil, e
- } else {
- return &lineInfo{t: t, k: t1, vr: r}, nil
- }
- } else {
- // Simple string path
- return &lineInfo{t: t, k: t1, vs: t2}, nil
- }
- }
- return &lineInfo{t: lIgnore}, nil
- }
-
- switch strings.ToLower(t1) {
- case tokEOL:
- // Don't consume t2 and continue parsing
- return &lineInfo{t: lIgnore}, nil
-
- case "user-agent", "useragent":
- // From google's spec:
- // Handling of <field> elements with simple errors / typos (eg "useragent"
- // instead of "user-agent") is undefined and may be interpreted as correct
- // directives by some user-agents.
- // The user-agent is non-case-sensitive.
- t2 = strings.ToLower(t2)
- return returnStringVal(lUserAgent)
-
- case "disallow":
- // From google's spec:
- // When no path is specified, the directive is ignored (so an empty Disallow
- // CAN be an allow, since allow is the default. The actual result depends
- // on the other rules in the group).
- return returnPathVal(lDisallow)
-
- case "allow":
- // From google's spec:
- // When no path is specified, the directive is ignored.
- return returnPathVal(lAllow)
-
- case "host":
- // Host directive to specify main site mirror
- // Read more: https://help.yandex.com/webmaster/controlling-robot/robots-txt.xml#host
- return returnStringVal(lHost)
-
- case "sitemap":
- // Non-group field, applies to the host as a whole, not to a specific user-agent
- return returnStringVal(lSitemap)
-
- case "crawl-delay", "crawldelay":
- // From http://en.wikipedia.org/wiki/Robots_exclusion_standard#Nonstandard_extensions
- // Several major crawlers support a Crawl-delay parameter, set to the
- // number of seconds to wait between successive requests to the same server.
- p.popToken()
- if cd, e := strconv.ParseFloat(t2, 64); e != nil {
- return nil, e
- } else if cd < 0 || math.IsInf(cd, 0) || math.IsNaN(cd) {
- return nil, fmt.Errorf("Crawl-delay invalid value '%s'", t2)
- } else {
- return &lineInfo{t: lCrawlDelay, k: t1, vf: cd}, nil
- }
- }
-
- // Consume t2 token
- p.popToken()
- return &lineInfo{t: lUnknown, k: t1}, nil
-}
-
-func (p *parser) popToken() (tok string, ok bool) {
- tok, ok = p.peekToken()
- if !ok {
- return
- }
- p.pos++
- return tok, true
-}
-
-func (p *parser) peekToken() (tok string, ok bool) {
- if p.pos >= len(p.tokens) {
- return "", false
- }
- return p.tokens[p.pos], true
-}
-
-func isAsterisk(r rune) bool {
- return r == '*'
-}
diff --git a/vendor/github.com/temoto/robotstxt/robotstxt.go b/vendor/github.com/temoto/robotstxt/robotstxt.go
deleted file mode 100644
index 52d3637c6..000000000
--- a/vendor/github.com/temoto/robotstxt/robotstxt.go
+++ /dev/null
@@ -1,227 +0,0 @@
-// Package robotstxt implements the robots.txt Exclusion Protocol
-// as specified in http://www.robotstxt.org/wc/robots.html
-// with various extensions.
-package robotstxt
-
-// Comments explaining the logic are taken from either the Google's spec:
-// https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
-
-import (
- "bytes"
- "errors"
- "io/ioutil"
- "net/http"
- "regexp"
- "strconv"
- "strings"
- "time"
-)
-
-type RobotsData struct {
- // private
- groups map[string]*Group
- allowAll bool
- disallowAll bool
- Host string
- Sitemaps []string
-}
-
-type Group struct {
- rules []*rule
- Agent string
- CrawlDelay time.Duration
-}
-
-type rule struct {
- path string
- allow bool
- pattern *regexp.Regexp
-}
-
-type ParseError struct {
- Errs []error
-}
-
-func newParseError(errs []error) *ParseError {
- return &ParseError{errs}
-}
-
-func (e ParseError) Error() string {
- var b bytes.Buffer
-
- b.WriteString("Parse error(s): " + "\n")
- for _, er := range e.Errs {
- b.WriteString(er.Error() + "\n")
- }
- return b.String()
-}
-
-var allowAll = &RobotsData{allowAll: true}
-var disallowAll = &RobotsData{disallowAll: true}
-var emptyGroup = &Group{}
-
-func FromStatusAndBytes(statusCode int, body []byte) (*RobotsData, error) {
- switch {
- case statusCode >= 200 && statusCode < 300:
- return FromBytes(body)
-
- // From https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
- //
- // Google treats all 4xx errors in the same way and assumes that no valid
- // robots.txt file exists. It is assumed that there are no restrictions.
- // This is a "full allow" for crawling. Note: this includes 401
- // "Unauthorized" and 403 "Forbidden" HTTP result codes.
- case statusCode >= 400 && statusCode < 500:
- return allowAll, nil
-
- // From Google's spec:
- // Server errors (5xx) are seen as temporary errors that result in a "full
- // disallow" of crawling.
- case statusCode >= 500 && statusCode < 600:
- return disallowAll, nil
- }
-
- return nil, errors.New("Unexpected status: " + strconv.Itoa(statusCode))
-}
-
-func FromStatusAndString(statusCode int, body string) (*RobotsData, error) {
- return FromStatusAndBytes(statusCode, []byte(body))
-}
-
-func FromResponse(res *http.Response) (*RobotsData, error) {
- if res == nil {
- // Edge case, if res is nil, return nil data
- return nil, nil
- }
- buf, e := ioutil.ReadAll(res.Body)
- if e != nil {
- return nil, e
- }
- return FromStatusAndBytes(res.StatusCode, buf)
-}
-
-func FromBytes(body []byte) (r *RobotsData, err error) {
- var errs []error
-
- // special case (probably not worth optimization?)
- trimmed := bytes.TrimSpace(body)
- if len(trimmed) == 0 {
- return allowAll, nil
- }
-
- sc := newByteScanner("bytes", true)
- //sc.Quiet = !print_errors
- sc.feed(body, true)
- tokens := sc.scanAll()
-
- // special case worth optimization
- if len(tokens) == 0 {
- return allowAll, nil
- }
-
- r = &RobotsData{}
- parser := newParser(tokens)
- r.groups, r.Host, r.Sitemaps, errs = parser.parseAll()
- if len(errs) > 0 {
- return nil, newParseError(errs)
- }
-
- return r, nil
-}
-
-func FromString(body string) (r *RobotsData, err error) {
- return FromBytes([]byte(body))
-}
-
-func (r *RobotsData) TestAgent(path, agent string) bool {
- if r.allowAll {
- return true
- }
- if r.disallowAll {
- return false
- }
-
- // Find a group of rules that applies to this agent
- // From Google's spec:
- // The user-agent is non-case-sensitive.
- g := r.FindGroup(agent)
- return g.Test(path)
-}
-
-// FindGroup searches block of declarations for specified user-agent.
-// From Google's spec:
-// Only one group of group-member records is valid for a particular crawler.
-// The crawler must determine the correct group of records by finding the group
-// with the most specific user-agent that still matches. All other groups of
-// records are ignored by the crawler. The user-agent is non-case-sensitive.
-// The order of the groups within the robots.txt file is irrelevant.
-func (r *RobotsData) FindGroup(agent string) (ret *Group) {
- var prefixLen int
-
- agent = strings.ToLower(agent)
- if ret = r.groups["*"]; ret != nil {
- // Weakest match possible
- prefixLen = 1
- }
- for a, g := range r.groups {
- if a != "*" && strings.HasPrefix(agent, a) {
- if l := len(a); l > prefixLen {
- prefixLen = l
- ret = g
- }
- }
- }
-
- if ret == nil {
- return emptyGroup
- }
- return
-}
-
-func (g *Group) Test(path string) bool {
- if r := g.findRule(path); r != nil {
- return r.allow
- }
-
- // From Google's spec:
- // By default, there are no restrictions for crawling for the designated crawlers.
- return true
-}
-
-// From Google's spec:
-// The path value is used as a basis to determine whether or not a rule applies
-// to a specific URL on a site. With the exception of wildcards, the path is
-// used to match the beginning of a URL (and any valid URLs that start with the
-// same path).
-//
-// At a group-member level, in particular for allow and disallow directives,
-// the most specific rule based on the length of the [path] entry will trump
-// the less specific (shorter) rule. The order of precedence for rules with
-// wildcards is undefined.
-func (g *Group) findRule(path string) (ret *rule) {
- var prefixLen int
-
- for _, r := range g.rules {
- if r.pattern != nil {
- if r.pattern.MatchString(path) {
- // Consider this a match equal to the length of the pattern.
- // From Google's spec:
- // The order of precedence for rules with wildcards is undefined.
- if l := len(r.pattern.String()); l > prefixLen {
- prefixLen = l
- ret = r
- }
- }
- } else if r.path == "/" && prefixLen == 0 {
- // Weakest match possible
- prefixLen = 1
- ret = r
- } else if strings.HasPrefix(path, r.path) {
- if l := len(r.path); l > prefixLen {
- prefixLen = l
- ret = r
- }
- }
- }
- return
-}
diff --git a/vendor/github.com/temoto/robotstxt/scanner.go b/vendor/github.com/temoto/robotstxt/scanner.go
deleted file mode 100644
index 6bd98c2ec..000000000
--- a/vendor/github.com/temoto/robotstxt/scanner.go
+++ /dev/null
@@ -1,185 +0,0 @@
-package robotstxt
-
-import (
- "bytes"
- "fmt"
- "go/token"
- "os"
- "sync"
- "unicode/utf8"
-)
-
-type byteScanner struct {
- pos token.Position
- buf []byte
- ErrorCount int
- ch rune
- Quiet bool
- keyTokenFound bool
- lastChunk bool
-}
-
-const tokEOL = "\n"
-
-var WhitespaceChars = []rune{' ', '\t', '\v'}
-var tokBuffers = sync.Pool{New: func() interface{} { return bytes.NewBuffer(make([]byte, 32)) }}
-
-func newByteScanner(srcname string, quiet bool) *byteScanner {
- return &byteScanner{
- Quiet: quiet,
- ch: -1,
- pos: token.Position{Filename: srcname},
- }
-}
-
-func (s *byteScanner) feed(input []byte, end bool) {
- s.buf = input
- s.pos.Offset = 0
- s.pos.Line = 1
- s.pos.Column = 1
- s.lastChunk = end
-
- // Read first char into look-ahead buffer `s.ch`.
- if !s.nextChar() {
- return
- }
-
- // Skip UTF-8 byte order mark
- if s.ch == 65279 {
- s.nextChar()
- s.pos.Column = 1
- }
-}
-
-func (s *byteScanner) GetPosition() token.Position {
- return s.pos
-}
-
-func (s *byteScanner) scan() string {
- // Note Offset > len, not >=, so we can scan last character.
- if s.lastChunk && s.pos.Offset > len(s.buf) {
- return ""
- }
-
- s.skipSpace()
-
- if s.ch == -1 {
- return ""
- }
-
- // EOL
- if s.isEol() {
- s.keyTokenFound = false
- // skip subsequent newline chars
- for s.ch != -1 && s.isEol() {
- s.nextChar()
- }
- // emit newline as separate token
- return tokEOL
- }
-
- // skip comments
- if s.ch == '#' {
- s.keyTokenFound = false
- s.skipUntilEol()
- if s.ch == -1 {
- return ""
- }
- // emit newline as separate token
- return tokEOL
- }
-
- // else we found something
- tok := tokBuffers.Get().(*bytes.Buffer)
- defer tokBuffers.Put(tok)
- tok.Reset()
- tok.WriteRune(s.ch)
- s.nextChar()
- for s.ch != -1 && !s.isSpace() && !s.isEol() {
- // Do not consider ":" to be a token separator if a first key token
- // has already been found on this line (avoid cutting an absolute URL
- // after the "http:")
- if s.ch == ':' && !s.keyTokenFound {
- s.nextChar()
- s.keyTokenFound = true
- break
- }
-
- tok.WriteRune(s.ch)
- s.nextChar()
- }
- return tok.String()
-}
-
-func (s *byteScanner) scanAll() []string {
- results := make([]string, 0, 64) // random guess of average tokens length
- for {
- token := s.scan()
- if token != "" {
- results = append(results, token)
- } else {
- break
- }
- }
- return results
-}
-
-func (s *byteScanner) error(pos token.Position, msg string) {
- s.ErrorCount++
- if !s.Quiet {
- fmt.Fprintf(os.Stderr, "robotstxt from %s: %s\n", pos.String(), msg)
- }
-}
-
-func (s *byteScanner) isEol() bool {
- return s.ch == '\n' || s.ch == '\r'
-}
-
-func (s *byteScanner) isSpace() bool {
- for _, r := range WhitespaceChars {
- if s.ch == r {
- return true
- }
- }
- return false
-}
-
-func (s *byteScanner) skipSpace() {
- for s.ch != -1 && s.isSpace() {
- s.nextChar()
- }
-}
-
-func (s *byteScanner) skipUntilEol() {
- for s.ch != -1 && !s.isEol() {
- s.nextChar()
- }
- // skip subsequent newline chars
- for s.ch != -1 && s.isEol() {
- s.nextChar()
- }
-}
-
-// Reads next Unicode char.
-func (s *byteScanner) nextChar() bool {
- if s.pos.Offset >= len(s.buf) {
- s.ch = -1
- return false
- }
- s.pos.Column++
- if s.ch == '\n' {
- s.pos.Line++
- s.pos.Column = 1
- }
- r, w := rune(s.buf[s.pos.Offset]), 1
- if r >= 0x80 {
- r, w = utf8.DecodeRune(s.buf[s.pos.Offset:])
- if r == utf8.RuneError && w == 1 {
- s.error(s.pos, "illegal UTF-8 encoding")
- }
- }
- s.pos.Column++
- s.pos.Offset += w
- s.ch = r
- return true
-}