From b1af8fd87760b34e3ff2fd3bda38f211815a0473 Mon Sep 17 00:00:00 2001 From: Terin Stock Date: Sun, 9 Mar 2025 17:47:56 +0100 Subject: [chore] remove vendor --- vendor/github.com/grafana/regexp/regexp.go | 1304 ---------------------------- 1 file changed, 1304 deletions(-) delete mode 100644 vendor/github.com/grafana/regexp/regexp.go (limited to 'vendor/github.com/grafana/regexp/regexp.go') diff --git a/vendor/github.com/grafana/regexp/regexp.go b/vendor/github.com/grafana/regexp/regexp.go deleted file mode 100644 index d1218ad0e..000000000 --- a/vendor/github.com/grafana/regexp/regexp.go +++ /dev/null @@ -1,1304 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package regexp implements regular expression search. -// -// The syntax of the regular expressions accepted is the same -// general syntax used by Perl, Python, and other languages. -// More precisely, it is the syntax accepted by RE2 and described at -// https://golang.org/s/re2syntax, except for \C. -// For an overview of the syntax, see the [regexp/syntax] package. -// -// The regexp implementation provided by this package is -// guaranteed to run in time linear in the size of the input. -// (This is a property not guaranteed by most open source -// implementations of regular expressions.) For more information -// about this property, see -// -// https://swtch.com/~rsc/regexp/regexp1.html -// -// or any book about automata theory. -// -// All characters are UTF-8-encoded code points. -// Following [utf8.DecodeRune], each byte of an invalid UTF-8 sequence -// is treated as if it encoded utf8.RuneError (U+FFFD). -// -// There are 16 methods of [Regexp] that match a regular expression and identify -// the matched text. Their names are matched by this regular expression: -// -// Find(All)?(String)?(Submatch)?(Index)? -// -// If 'All' is present, the routine matches successive non-overlapping -// matches of the entire expression. Empty matches abutting a preceding -// match are ignored. The return value is a slice containing the successive -// return values of the corresponding non-'All' routine. These routines take -// an extra integer argument, n. If n >= 0, the function returns at most n -// matches/submatches; otherwise, it returns all of them. -// -// If 'String' is present, the argument is a string; otherwise it is a slice -// of bytes; return values are adjusted as appropriate. -// -// If 'Submatch' is present, the return value is a slice identifying the -// successive submatches of the expression. Submatches are matches of -// parenthesized subexpressions (also known as capturing groups) within the -// regular expression, numbered from left to right in order of opening -// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is -// the match of the first parenthesized subexpression, and so on. -// -// If 'Index' is present, matches and submatches are identified by byte index -// pairs within the input string: result[2*n:2*n+2] identifies the indexes of -// the nth submatch. The pair for n==0 identifies the match of the entire -// expression. If 'Index' is not present, the match is identified by the text -// of the match/submatch. If an index is negative or text is nil, it means that -// subexpression did not match any string in the input. For 'String' versions -// an empty string means either no match or an empty match. -// -// There is also a subset of the methods that can be applied to text read -// from a RuneReader: -// -// MatchReader, FindReaderIndex, FindReaderSubmatchIndex -// -// This set may grow. Note that regular expression matches may need to -// examine text beyond the text returned by a match, so the methods that -// match text from a RuneReader may read arbitrarily far into the input -// before returning. -// -// (There are a few other methods that do not match this pattern.) -package regexp - -import ( - "bytes" - "io" - "regexp/syntax" - "strconv" - "strings" - "sync" - "unicode" - "unicode/utf8" -) - -// Regexp is the representation of a compiled regular expression. -// A Regexp is safe for concurrent use by multiple goroutines, -// except for configuration methods, such as [Regexp.Longest]. -type Regexp struct { - expr string // as passed to Compile - prog *syntax.Prog // compiled program - onepass *onePassProg // onepass program or nil - numSubexp int - maxBitStateLen int - subexpNames []string - prefix string // required prefix in unanchored matches - prefixBytes []byte // prefix, as a []byte - prefixRune rune // first rune in prefix - prefixEnd uint32 // pc for last rune in prefix - mpool int // pool for machines - matchcap int // size of recorded match lengths - prefixComplete bool // prefix is the entire regexp - cond syntax.EmptyOp // empty-width conditions required at start of match - minInputLen int // minimum length of the input in bytes - - // This field can be modified by the Longest method, - // but it is otherwise read-only. - longest bool // whether regexp prefers leftmost-longest match -} - -// String returns the source text used to compile the regular expression. -func (re *Regexp) String() string { - return re.expr -} - -// Copy returns a new [Regexp] object copied from re. -// Calling [Regexp.Longest] on one copy does not affect another. -// -// Deprecated: In earlier releases, when using a [Regexp] in multiple goroutines, -// giving each goroutine its own copy helped to avoid lock contention. -// As of Go 1.12, using Copy is no longer necessary to avoid lock contention. -// Copy may still be appropriate if the reason for its use is to make -// two copies with different [Regexp.Longest] settings. -func (re *Regexp) Copy() *Regexp { - re2 := *re - return &re2 -} - -// Compile parses a regular expression and returns, if successful, -// a [Regexp] object that can be used to match against text. -// -// When matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses the one that a backtracking search would have found first. -// This so-called leftmost-first matching is the same semantics -// that Perl, Python, and other implementations use, although this -// package implements it without the expense of backtracking. -// For POSIX leftmost-longest matching, see [CompilePOSIX]. -func Compile(expr string) (*Regexp, error) { - return compile(expr, syntax.Perl, false) -} - -// CompilePOSIX is like [Compile] but restricts the regular expression -// to POSIX ERE (egrep) syntax and changes the match semantics to -// leftmost-longest. -// -// That is, when matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses a match that is as long as possible. -// This so-called leftmost-longest matching is the same semantics -// that early regular expression implementations used and that POSIX -// specifies. -// -// However, there can be multiple leftmost-longest matches, with different -// submatch choices, and here this package diverges from POSIX. -// Among the possible leftmost-longest matches, this package chooses -// the one that a backtracking search would have found first, while POSIX -// specifies that the match be chosen to maximize the length of the first -// subexpression, then the second, and so on from left to right. -// The POSIX rule is computationally prohibitive and not even well-defined. -// See https://swtch.com/~rsc/regexp/regexp2.html#posix for details. -func CompilePOSIX(expr string) (*Regexp, error) { - return compile(expr, syntax.POSIX, true) -} - -// Longest makes future searches prefer the leftmost-longest match. -// That is, when matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses a match that is as long as possible. -// This method modifies the [Regexp] and may not be called concurrently -// with any other methods. -func (re *Regexp) Longest() { - re.longest = true -} - -func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { - re, err := syntax.Parse(expr, mode) - if err != nil { - return nil, err - } - maxCap := re.MaxCap() - capNames := re.CapNames() - - re = re.Simplify() - prog, err := syntax.Compile(re) - if err != nil { - return nil, err - } - matchcap := prog.NumCap - if matchcap < 2 { - matchcap = 2 - } - regexp := &Regexp{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - cond: prog.StartCond(), - longest: longest, - matchcap: matchcap, - minInputLen: minInputLen(re), - } - if regexp.onepass == nil { - regexp.prefix, regexp.prefixComplete = prog.Prefix() - regexp.maxBitStateLen = maxBitStateLen(prog) - } else { - regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog) - } - if regexp.prefix != "" { - // TODO(rsc): Remove this allocation by adding - // IndexString to package bytes. - regexp.prefixBytes = []byte(regexp.prefix) - regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) - } - - n := len(prog.Inst) - i := 0 - for matchSize[i] != 0 && matchSize[i] < n { - i++ - } - regexp.mpool = i - - return regexp, nil -} - -// Pools of *machine for use during (*Regexp).doExecute, -// split up by the size of the execution queues. -// matchPool[i] machines have queue size matchSize[i]. -// On a 64-bit system each queue entry is 16 bytes, -// so matchPool[0] has 16*2*128 = 4kB queues, etc. -// The final matchPool is a catch-all for very large queues. -var ( - matchSize = [...]int{128, 512, 2048, 16384, 0} - matchPool [len(matchSize)]sync.Pool -) - -// get returns a machine to use for matching re. -// It uses the re's machine cache if possible, to avoid -// unnecessary allocation. -func (re *Regexp) get() *machine { - m, ok := matchPool[re.mpool].Get().(*machine) - if !ok { - m = new(machine) - } - m.re = re - m.p = re.prog - if cap(m.matchcap) < re.matchcap { - m.matchcap = make([]int, re.matchcap) - for _, t := range m.pool { - t.cap = make([]int, re.matchcap) - } - } - - // Allocate queues if needed. - // Or reallocate, for "large" match pool. - n := matchSize[re.mpool] - if n == 0 { // large pool - n = len(re.prog.Inst) - } - if len(m.q0.sparse) < n { - m.q0 = queue{make([]uint32, n), make([]entry, 0, n)} - m.q1 = queue{make([]uint32, n), make([]entry, 0, n)} - } - return m -} - -// put returns a machine to the correct machine pool. -func (re *Regexp) put(m *machine) { - m.re = nil - m.p = nil - m.inputs.clear() - matchPool[re.mpool].Put(m) -} - -// minInputLen walks the regexp to find the minimum length of any matchable input. -func minInputLen(re *syntax.Regexp) int { - switch re.Op { - default: - return 0 - case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass: - return 1 - case syntax.OpLiteral: - l := 0 - for _, r := range re.Rune { - if r == utf8.RuneError { - l++ - } else { - l += utf8.RuneLen(r) - } - } - return l - case syntax.OpCapture, syntax.OpPlus: - return minInputLen(re.Sub[0]) - case syntax.OpRepeat: - return re.Min * minInputLen(re.Sub[0]) - case syntax.OpConcat: - l := 0 - for _, sub := range re.Sub { - l += minInputLen(sub) - } - return l - case syntax.OpAlternate: - l := minInputLen(re.Sub[0]) - var lnext int - for _, sub := range re.Sub[1:] { - lnext = minInputLen(sub) - if lnext < l { - l = lnext - } - } - return l - } -} - -// MustCompile is like [Compile] but panics if the expression cannot be parsed. -// It simplifies safe initialization of global variables holding compiled regular -// expressions. -func MustCompile(str string) *Regexp { - regexp, err := Compile(str) - if err != nil { - panic(`regexp: Compile(` + quote(str) + `): ` + err.Error()) - } - return regexp -} - -// MustCompilePOSIX is like [CompilePOSIX] but panics if the expression cannot be parsed. -// It simplifies safe initialization of global variables holding compiled regular -// expressions. -func MustCompilePOSIX(str string) *Regexp { - regexp, err := CompilePOSIX(str) - if err != nil { - panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + err.Error()) - } - return regexp -} - -func quote(s string) string { - if strconv.CanBackquote(s) { - return "`" + s + "`" - } - return strconv.Quote(s) -} - -// NumSubexp returns the number of parenthesized subexpressions in this [Regexp]. -func (re *Regexp) NumSubexp() int { - return re.numSubexp -} - -// SubexpNames returns the names of the parenthesized subexpressions -// in this [Regexp]. The name for the first sub-expression is names[1], -// so that if m is a match slice, the name for m[i] is SubexpNames()[i]. -// Since the Regexp as a whole cannot be named, names[0] is always -// the empty string. The slice should not be modified. -func (re *Regexp) SubexpNames() []string { - return re.subexpNames -} - -// SubexpIndex returns the index of the first subexpression with the given name, -// or -1 if there is no subexpression with that name. -// -// Note that multiple subexpressions can be written using the same name, as in -// (?Pa+)(?Pb+), which declares two subexpressions named "bob". -// In this case, SubexpIndex returns the index of the leftmost such subexpression -// in the regular expression. -func (re *Regexp) SubexpIndex(name string) int { - if name != "" { - for i, s := range re.subexpNames { - if name == s { - return i - } - } - } - return -1 -} - -const endOfText rune = -1 - -// input abstracts different representations of the input text. It provides -// one-character lookahead. -type input interface { - step(pos int) (r rune, width int) // advance one rune - canCheckPrefix() bool // can we look ahead without losing info? - hasPrefix(re *Regexp) bool - index(re *Regexp, pos int) int - context(pos int) lazyFlag -} - -// inputString scans a string. -type inputString struct { - str string -} - -func (i *inputString) step(pos int) (rune, int) { - if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } - return utf8.DecodeRuneInString(i.str[pos:]) - } - return endOfText, 0 -} - -func (i *inputString) canCheckPrefix() bool { - return true -} - -func (i *inputString) hasPrefix(re *Regexp) bool { - return strings.HasPrefix(i.str, re.prefix) -} - -func (i *inputString) index(re *Regexp, pos int) int { - return strings.Index(i.str[pos:], re.prefix) -} - -func (i *inputString) context(pos int) lazyFlag { - r1, r2 := endOfText, endOfText - // 0 < pos && pos <= len(i.str) - if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) - } - } - // 0 <= pos && pos < len(i.str) - if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) - } - } - return newLazyFlag(r1, r2) -} - -// inputBytes scans a byte slice. -type inputBytes struct { - str []byte -} - -func (i *inputBytes) step(pos int) (rune, int) { - if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } - return utf8.DecodeRune(i.str[pos:]) - } - return endOfText, 0 -} - -func (i *inputBytes) canCheckPrefix() bool { - return true -} - -func (i *inputBytes) hasPrefix(re *Regexp) bool { - return bytes.HasPrefix(i.str, re.prefixBytes) -} - -func (i *inputBytes) index(re *Regexp, pos int) int { - return bytes.Index(i.str[pos:], re.prefixBytes) -} - -func (i *inputBytes) context(pos int) lazyFlag { - r1, r2 := endOfText, endOfText - // 0 < pos && pos <= len(i.str) - if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) - } - } - // 0 <= pos && pos < len(i.str) - if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRune(i.str[pos:]) - } - } - return newLazyFlag(r1, r2) -} - -// inputReader scans a RuneReader. -type inputReader struct { - r io.RuneReader - atEOT bool - pos int -} - -func (i *inputReader) step(pos int) (rune, int) { - if !i.atEOT && pos != i.pos { - return endOfText, 0 - - } - r, w, err := i.r.ReadRune() - if err != nil { - i.atEOT = true - return endOfText, 0 - } - i.pos += w - return r, w -} - -func (i *inputReader) canCheckPrefix() bool { - return false -} - -func (i *inputReader) hasPrefix(re *Regexp) bool { - return false -} - -func (i *inputReader) index(re *Regexp, pos int) int { - return -1 -} - -func (i *inputReader) context(pos int) lazyFlag { - return 0 // not used -} - -// LiteralPrefix returns a literal string that must begin any match -// of the regular expression re. It returns the boolean true if the -// literal string comprises the entire regular expression. -func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { - return re.prefix, re.prefixComplete -} - -// MatchReader reports whether the text returned by the [io.RuneReader] -// contains any match of the regular expression re. -func (re *Regexp) MatchReader(r io.RuneReader) bool { - return re.doMatch(r, nil, "") -} - -// MatchString reports whether the string s -// contains any match of the regular expression re. -func (re *Regexp) MatchString(s string) bool { - return re.doMatch(nil, nil, s) -} - -// Match reports whether the byte slice b -// contains any match of the regular expression re. -func (re *Regexp) Match(b []byte) bool { - return re.doMatch(nil, b, "") -} - -// MatchReader reports whether the text returned by the RuneReader -// contains any match of the regular expression pattern. -// More complicated queries need to use [Compile] and the full [Regexp] interface. -func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchReader(r), nil -} - -// MatchString reports whether the string s -// contains any match of the regular expression pattern. -// More complicated queries need to use [Compile] and the full [Regexp] interface. -func MatchString(pattern string, s string) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchString(s), nil -} - -// Match reports whether the byte slice b -// contains any match of the regular expression pattern. -// More complicated queries need to use [Compile] and the full [Regexp] interface. -func Match(pattern string, b []byte) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.Match(b), nil -} - -// ReplaceAllString returns a copy of src, replacing matches of the [Regexp] -// with the replacement string repl. -// Inside repl, $ signs are interpreted as in [Regexp.Expand]. -func (re *Regexp) ReplaceAllString(src, repl string) string { - n := 2 - if strings.Contains(repl, "$") { - n = 2 * (re.numSubexp + 1) - } - b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte { - return re.expand(dst, repl, nil, src, match) - }) - return string(b) -} - -// ReplaceAllLiteralString returns a copy of src, replacing matches of the [Regexp] -// with the replacement string repl. The replacement repl is substituted directly, -// without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { - return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { - return append(dst, repl...) - })) -} - -// ReplaceAllStringFunc returns a copy of src in which all matches of the -// [Regexp] have been replaced by the return value of function repl applied -// to the matched substring. The replacement returned by repl is substituted -// directly, without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { - b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { - return append(dst, repl(src[match[0]:match[1]])...) - }) - return string(b) -} - -func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte { - lastMatchEnd := 0 // end position of the most recent match - searchPos := 0 // position where we next look for a match - var buf []byte - var endPos int - if bsrc != nil { - endPos = len(bsrc) - } else { - endPos = len(src) - } - if nmatch > re.prog.NumCap { - nmatch = re.prog.NumCap - } - - var dstCap [2]int - for searchPos <= endPos { - a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0]) - if len(a) == 0 { - break // no more matches - } - - // Copy the unmatched characters before this match. - if bsrc != nil { - buf = append(buf, bsrc[lastMatchEnd:a[0]]...) - } else { - buf = append(buf, src[lastMatchEnd:a[0]]...) - } - - // Now insert a copy of the replacement string, but not for a - // match of the empty string immediately after another match. - // (Otherwise, we get double replacement for patterns that - // match both empty and nonempty strings.) - if a[1] > lastMatchEnd || a[0] == 0 { - buf = repl(buf, a) - } - lastMatchEnd = a[1] - - // Advance past this match; always advance at least one character. - var width int - if bsrc != nil { - _, width = utf8.DecodeRune(bsrc[searchPos:]) - } else { - _, width = utf8.DecodeRuneInString(src[searchPos:]) - } - if searchPos+width > a[1] { - searchPos += width - } else if searchPos+1 > a[1] { - // This clause is only needed at the end of the input - // string. In that case, DecodeRuneInString returns width=0. - searchPos++ - } else { - searchPos = a[1] - } - } - - // Copy the unmatched characters after the last match. - if bsrc != nil { - buf = append(buf, bsrc[lastMatchEnd:]...) - } else { - buf = append(buf, src[lastMatchEnd:]...) - } - - return buf -} - -// ReplaceAll returns a copy of src, replacing matches of the [Regexp] -// with the replacement text repl. -// Inside repl, $ signs are interpreted as in [Regexp.Expand]. -func (re *Regexp) ReplaceAll(src, repl []byte) []byte { - n := 2 - if bytes.IndexByte(repl, '$') >= 0 { - n = 2 * (re.numSubexp + 1) - } - srepl := "" - b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte { - if len(srepl) != len(repl) { - srepl = string(repl) - } - return re.expand(dst, srepl, src, "", match) - }) - return b -} - -// ReplaceAllLiteral returns a copy of src, replacing matches of the [Regexp] -// with the replacement bytes repl. The replacement repl is substituted directly, -// without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { - return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { - return append(dst, repl...) - }) -} - -// ReplaceAllFunc returns a copy of src in which all matches of the -// [Regexp] have been replaced by the return value of function repl applied -// to the matched byte slice. The replacement returned by repl is substituted -// directly, without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { - return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { - return append(dst, repl(src[match[0]:match[1]])...) - }) -} - -// Bitmap used by func special to check whether a character needs to be escaped. -var specialBytes [16]byte - -// special reports whether byte b needs to be escaped by QuoteMeta. -func special(b byte) bool { - return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 -} - -func init() { - for _, b := range []byte(`\.+*?()|[]{}^$`) { - specialBytes[b%16] |= 1 << (b / 16) - } -} - -// QuoteMeta returns a string that escapes all regular expression metacharacters -// inside the argument text; the returned string is a regular expression matching -// the literal text. -func QuoteMeta(s string) string { - // A byte loop is correct because all metacharacters are ASCII. - var i int - for i = 0; i < len(s); i++ { - if special(s[i]) { - break - } - } - // No meta characters found, so return original string. - if i >= len(s) { - return s - } - - b := make([]byte, 2*len(s)-i) - copy(b, s[:i]) - j := i - for ; i < len(s); i++ { - if special(s[i]) { - b[j] = '\\' - j++ - } - b[j] = s[i] - j++ - } - return string(b[:j]) -} - -// The number of capture values in the program may correspond -// to fewer capturing expressions than are in the regexp. -// For example, "(a){0}" turns into an empty program, so the -// maximum capture in the program is 0 but we need to return -// an expression for \1. Pad appends -1s to the slice a as needed. -func (re *Regexp) pad(a []int) []int { - if a == nil { - // No match. - return nil - } - n := (1 + re.numSubexp) * 2 - for len(a) < n { - a = append(a, -1) - } - return a -} - -// allMatches calls deliver at most n times -// with the location of successive matches in the input text. -// The input text is b if non-nil, otherwise s. -func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { - var end int - if b == nil { - end = len(s) - } else { - end = len(b) - } - - for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { - matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil) - if len(matches) == 0 { - break - } - - accept := true - if matches[1] == pos { - // We've found an empty match. - if matches[0] == prevMatchEnd { - // We don't allow an empty match right - // after a previous match, so ignore it. - accept = false - } - var width int - if b == nil { - is := inputString{str: s} - _, width = is.step(pos) - } else { - ib := inputBytes{str: b} - _, width = ib.step(pos) - } - if width > 0 { - pos += width - } else { - pos = end + 1 - } - } else { - pos = matches[1] - } - prevMatchEnd = matches[1] - - if accept { - deliver(re.pad(matches)) - i++ - } - } -} - -// Find returns a slice holding the text of the leftmost match in b of the regular expression. -// A return value of nil indicates no match. -func (re *Regexp) Find(b []byte) []byte { - var dstCap [2]int - a := re.doExecute(nil, b, "", 0, 2, dstCap[:0]) - if a == nil { - return nil - } - return b[a[0]:a[1]:a[1]] -} - -// FindIndex returns a two-element slice of integers defining the location of -// the leftmost match in b of the regular expression. The match itself is at -// b[loc[0]:loc[1]]. -// A return value of nil indicates no match. -func (re *Regexp) FindIndex(b []byte) (loc []int) { - a := re.doExecute(nil, b, "", 0, 2, nil) - if a == nil { - return nil - } - return a[0:2] -} - -// FindString returns a string holding the text of the leftmost match in s of the regular -// expression. If there is no match, the return value is an empty string, -// but it will also be empty if the regular expression successfully matches -// an empty string. Use [Regexp.FindStringIndex] or [Regexp.FindStringSubmatch] if it is -// necessary to distinguish these cases. -func (re *Regexp) FindString(s string) string { - var dstCap [2]int - a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0]) - if a == nil { - return "" - } - return s[a[0]:a[1]] -} - -// FindStringIndex returns a two-element slice of integers defining the -// location of the leftmost match in s of the regular expression. The match -// itself is at s[loc[0]:loc[1]]. -// A return value of nil indicates no match. -func (re *Regexp) FindStringIndex(s string) (loc []int) { - a := re.doExecute(nil, nil, s, 0, 2, nil) - if a == nil { - return nil - } - return a[0:2] -} - -// FindReaderIndex returns a two-element slice of integers defining the -// location of the leftmost match of the regular expression in text read from -// the [io.RuneReader]. The match text was found in the input stream at -// byte offset loc[0] through loc[1]-1. -// A return value of nil indicates no match. -func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { - a := re.doExecute(r, nil, "", 0, 2, nil) - if a == nil { - return nil - } - return a[0:2] -} - -// FindSubmatch returns a slice of slices holding the text of the leftmost -// match of the regular expression in b and the matches, if any, of its -// subexpressions, as defined by the 'Submatch' descriptions in the package -// comment. -// A return value of nil indicates no match. -func (re *Regexp) FindSubmatch(b []byte) [][]byte { - var dstCap [4]int - a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0]) - if a == nil { - return nil - } - ret := make([][]byte, 1+re.numSubexp) - for i := range ret { - if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = b[a[2*i]:a[2*i+1]:a[2*i+1]] - } - } - return ret -} - -// Expand appends template to dst and returns the result; during the -// append, Expand replaces variables in the template with corresponding -// matches drawn from src. The match slice should have been returned by -// [Regexp.FindSubmatchIndex]. -// -// In the template, a variable is denoted by a substring of the form -// $name or ${name}, where name is a non-empty sequence of letters, -// digits, and underscores. A purely numeric name like $1 refers to -// the submatch with the corresponding index; other names refer to -// capturing parentheses named with the (?P...) syntax. A -// reference to an out of range or unmatched index or a name that is not -// present in the regular expression is replaced with an empty slice. -// -// In the $name form, name is taken to be as long as possible: $1x is -// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. -// -// To insert a literal $ in the output, use $$ in the template. -func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte { - return re.expand(dst, string(template), src, "", match) -} - -// ExpandString is like [Regexp.Expand] but the template and source are strings. -// It appends to and returns a byte slice in order to give the calling -// code control over allocation. -func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte { - return re.expand(dst, template, nil, src, match) -} - -func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte { - for len(template) > 0 { - before, after, ok := strings.Cut(template, "$") - if !ok { - break - } - dst = append(dst, before...) - template = after - if template != "" && template[0] == '$' { - // Treat $$ as $. - dst = append(dst, '$') - template = template[1:] - continue - } - name, num, rest, ok := extract(template) - if !ok { - // Malformed; treat $ as raw text. - dst = append(dst, '$') - continue - } - template = rest - if num >= 0 { - if 2*num+1 < len(match) && match[2*num] >= 0 { - if bsrc != nil { - dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...) - } else { - dst = append(dst, src[match[2*num]:match[2*num+1]]...) - } - } - } else { - for i, namei := range re.subexpNames { - if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 { - if bsrc != nil { - dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...) - } else { - dst = append(dst, src[match[2*i]:match[2*i+1]]...) - } - break - } - } - } - } - dst = append(dst, template...) - return dst -} - -// extract returns the name from a leading "name" or "{name}" in str. -// (The $ has already been removed by the caller.) -// If it is a number, extract returns num set to that number; otherwise num = -1. -func extract(str string) (name string, num int, rest string, ok bool) { - if str == "" { - return - } - brace := false - if str[0] == '{' { - brace = true - str = str[1:] - } - i := 0 - for i < len(str) { - rune, size := utf8.DecodeRuneInString(str[i:]) - if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' { - break - } - i += size - } - if i == 0 { - // empty name is not okay - return - } - name = str[:i] - if brace { - if i >= len(str) || str[i] != '}' { - // missing closing brace - return - } - i++ - } - - // Parse number. - num = 0 - for i := 0; i < len(name); i++ { - if name[i] < '0' || '9' < name[i] || num >= 1e8 { - num = -1 - break - } - num = num*10 + int(name[i]) - '0' - } - // Disallow leading zeros. - if name[0] == '0' && len(name) > 1 { - num = -1 - } - - rest = str[i:] - ok = true - return -} - -// FindSubmatchIndex returns a slice holding the index pairs identifying the -// leftmost match of the regular expression in b and the matches, if any, of -// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindSubmatchIndex(b []byte) []int { - return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil)) -} - -// FindStringSubmatch returns a slice of strings holding the text of the -// leftmost match of the regular expression in s and the matches, if any, of -// its subexpressions, as defined by the 'Submatch' description in the -// package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindStringSubmatch(s string) []string { - var dstCap [4]int - a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0]) - if a == nil { - return nil - } - ret := make([]string, 1+re.numSubexp) - for i := range ret { - if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = s[a[2*i]:a[2*i+1]] - } - } - return ret -} - -// FindStringSubmatchIndex returns a slice holding the index pairs -// identifying the leftmost match of the regular expression in s and the -// matches, if any, of its subexpressions, as defined by the 'Submatch' and -// 'Index' descriptions in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindStringSubmatchIndex(s string) []int { - return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil)) -} - -// FindReaderSubmatchIndex returns a slice holding the index pairs -// identifying the leftmost match of the regular expression of text read by -// the [io.RuneReader], and the matches, if any, of its subexpressions, as defined -// by the 'Submatch' and 'Index' descriptions in the package comment. A -// return value of nil indicates no match. -func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { - return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil)) -} - -const startSize = 10 // The size at which to start a slice in the 'All' routines. - -// FindAll is the 'All' version of [Regexp.Find]; it returns a slice of all successive -// matches of the expression, as defined by the 'All' description in the -// package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAll(b []byte, n int) [][]byte { - if n < 0 { - n = len(b) + 1 - } - var result [][]byte - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][]byte, 0, startSize) - } - result = append(result, b[match[0]:match[1]:match[1]]) - }) - return result -} - -// FindAllIndex is the 'All' version of [Regexp.FindIndex]; it returns a slice of all -// successive matches of the expression, as defined by the 'All' description -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { - if n < 0 { - n = len(b) + 1 - } - var result [][]int - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match[0:2]) - }) - return result -} - -// FindAllString is the 'All' version of [Regexp.FindString]; it returns a slice of all -// successive matches of the expression, as defined by the 'All' description -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllString(s string, n int) []string { - if n < 0 { - n = len(s) + 1 - } - var result []string - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([]string, 0, startSize) - } - result = append(result, s[match[0]:match[1]]) - }) - return result -} - -// FindAllStringIndex is the 'All' version of [Regexp.FindStringIndex]; it returns a -// slice of all successive matches of the expression, as defined by the 'All' -// description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { - if n < 0 { - n = len(s) + 1 - } - var result [][]int - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match[0:2]) - }) - return result -} - -// FindAllSubmatch is the 'All' version of [Regexp.FindSubmatch]; it returns a slice -// of all successive matches of the expression, as defined by the 'All' -// description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { - if n < 0 { - n = len(b) + 1 - } - var result [][][]byte - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][][]byte, 0, startSize) - } - slice := make([][]byte, len(match)/2) - for j := range slice { - if match[2*j] >= 0 { - slice[j] = b[match[2*j]:match[2*j+1]:match[2*j+1]] - } - } - result = append(result, slice) - }) - return result -} - -// FindAllSubmatchIndex is the 'All' version of [Regexp.FindSubmatchIndex]; it returns -// a slice of all successive matches of the expression, as defined by the -// 'All' description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { - if n < 0 { - n = len(b) + 1 - } - var result [][]int - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match) - }) - return result -} - -// FindAllStringSubmatch is the 'All' version of [Regexp.FindStringSubmatch]; it -// returns a slice of all successive matches of the expression, as defined by -// the 'All' description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { - if n < 0 { - n = len(s) + 1 - } - var result [][]string - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([][]string, 0, startSize) - } - slice := make([]string, len(match)/2) - for j := range slice { - if match[2*j] >= 0 { - slice[j] = s[match[2*j]:match[2*j+1]] - } - } - result = append(result, slice) - }) - return result -} - -// FindAllStringSubmatchIndex is the 'All' version of -// [Regexp.FindStringSubmatchIndex]; it returns a slice of all successive matches of -// the expression, as defined by the 'All' description in the package -// comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { - if n < 0 { - n = len(s) + 1 - } - var result [][]int - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match) - }) - return result -} - -// Split slices s into substrings separated by the expression and returns a slice of -// the substrings between those expression matches. -// -// The slice returned by this method consists of all the substrings of s -// not contained in the slice returned by [Regexp.FindAllString]. When called on an expression -// that contains no metacharacters, it is equivalent to [strings.SplitN]. -// -// Example: -// -// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) -// // s: ["", "b", "b", "c", "cadaaae"] -// -// The count determines the number of substrings to return: -// -// n > 0: at most n substrings; the last substring will be the unsplit remainder. -// n == 0: the result is nil (zero substrings) -// n < 0: all substrings -func (re *Regexp) Split(s string, n int) []string { - - if n == 0 { - return nil - } - - if len(re.expr) > 0 && len(s) == 0 { - return []string{""} - } - - matches := re.FindAllStringIndex(s, n) - strings := make([]string, 0, len(matches)) - - beg := 0 - end := 0 - for _, match := range matches { - if n > 0 && len(strings) >= n-1 { - break - } - - end = match[0] - if match[1] != 0 { - strings = append(strings, s[beg:end]) - } - beg = match[1] - } - - if end != len(s) { - strings = append(strings, s[beg:]) - } - - return strings -} - -// MarshalText implements [encoding.TextMarshaler]. The output -// matches that of calling the [Regexp.String] method. -// -// Note that the output is lossy in some cases: This method does not indicate -// POSIX regular expressions (i.e. those compiled by calling [CompilePOSIX]), or -// those for which the [Regexp.Longest] method has been called. -func (re *Regexp) MarshalText() ([]byte, error) { - return []byte(re.String()), nil -} - -// UnmarshalText implements [encoding.TextUnmarshaler] by calling -// [Compile] on the encoded value. -func (re *Regexp) UnmarshalText(text []byte) error { - newRE, err := Compile(string(text)) - if err != nil { - return err - } - *re = *newRE - return nil -} -- cgit v1.2.3