diff options
Diffstat (limited to 'vendor/github.com/grafana')
| -rw-r--r-- | vendor/github.com/grafana/regexp/.gitignore | 15 | ||||
| -rw-r--r-- | vendor/github.com/grafana/regexp/LICENSE | 27 | ||||
| -rw-r--r-- | vendor/github.com/grafana/regexp/README.md | 12 | ||||
| -rw-r--r-- | vendor/github.com/grafana/regexp/backtrack.go | 365 | ||||
| -rw-r--r-- | vendor/github.com/grafana/regexp/exec.go | 554 | ||||
| -rw-r--r-- | vendor/github.com/grafana/regexp/onepass.go | 500 | ||||
| -rw-r--r-- | vendor/github.com/grafana/regexp/regexp.go | 1304 |
7 files changed, 0 insertions, 2777 deletions
diff --git a/vendor/github.com/grafana/regexp/.gitignore b/vendor/github.com/grafana/regexp/.gitignore deleted file mode 100644 index 66fd13c90..000000000 --- a/vendor/github.com/grafana/regexp/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -# Binaries for programs and plugins -*.exe -*.exe~ -*.dll -*.so -*.dylib - -# Test binary, built with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Dependency directories (remove the comment below to include it) -# vendor/ diff --git a/vendor/github.com/grafana/regexp/LICENSE b/vendor/github.com/grafana/regexp/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/vendor/github.com/grafana/regexp/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/grafana/regexp/README.md b/vendor/github.com/grafana/regexp/README.md deleted file mode 100644 index 756e60dcf..000000000 --- a/vendor/github.com/grafana/regexp/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Grafana Go regexp package -This repo is a fork of the upstream Go `regexp` package, with some code optimisations to make it run faster. - -All the optimisations have been submitted upstream, but not yet merged. - -All semantics are the same, and the optimised code passes all tests from upstream. - -The `main` branch is non-optimised: switch over to [`speedup`](https://github.com/grafana/regexp/tree/speedup) branch for the improved code. - -## Benchmarks: - - diff --git a/vendor/github.com/grafana/regexp/backtrack.go b/vendor/github.com/grafana/regexp/backtrack.go deleted file mode 100644 index 7c37c66a8..000000000 --- a/vendor/github.com/grafana/regexp/backtrack.go +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// backtrack is a regular expression search with submatch -// tracking for small regular expressions and texts. It allocates -// a bit vector with (length of input) * (length of prog) bits, -// to make sure it never explores the same (character position, instruction) -// state multiple times. This limits the search to run in time linear in -// the length of the test. -// -// backtrack is a fast replacement for the NFA code on small -// regexps when onepass cannot be used. - -package regexp - -import ( - "regexp/syntax" - "sync" -) - -// A job is an entry on the backtracker's job stack. It holds -// the instruction pc and the position in the input. -type job struct { - pc uint32 - arg bool - pos int -} - -const ( - visitedBits = 32 - maxBacktrackProg = 500 // len(prog.Inst) <= max - maxBacktrackVector = 256 * 1024 // bit vector size <= max (bits) -) - -// bitState holds state for the backtracker. -type bitState struct { - end int - cap []int - matchcap []int - jobs []job - visited []uint32 - - inputs inputs -} - -var bitStatePool sync.Pool - -func newBitState() *bitState { - b, ok := bitStatePool.Get().(*bitState) - if !ok { - b = new(bitState) - } - return b -} - -func freeBitState(b *bitState) { - b.inputs.clear() - bitStatePool.Put(b) -} - -// maxBitStateLen returns the maximum length of a string to search with -// the backtracker using prog. -func maxBitStateLen(prog *syntax.Prog) int { - if !shouldBacktrack(prog) { - return 0 - } - return maxBacktrackVector / len(prog.Inst) -} - -// shouldBacktrack reports whether the program is too -// long for the backtracker to run. -func shouldBacktrack(prog *syntax.Prog) bool { - return len(prog.Inst) <= maxBacktrackProg -} - -// reset resets the state of the backtracker. -// end is the end position in the input. -// ncap is the number of captures. -func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) { - b.end = end - - if cap(b.jobs) == 0 { - b.jobs = make([]job, 0, 256) - } else { - b.jobs = b.jobs[:0] - } - - visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits - if cap(b.visited) < visitedSize { - b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits) - } else { - b.visited = b.visited[:visitedSize] - clear(b.visited) // set to 0 - } - - if cap(b.cap) < ncap { - b.cap = make([]int, ncap) - } else { - b.cap = b.cap[:ncap] - } - for i := range b.cap { - b.cap[i] = -1 - } - - if cap(b.matchcap) < ncap { - b.matchcap = make([]int, ncap) - } else { - b.matchcap = b.matchcap[:ncap] - } - for i := range b.matchcap { - b.matchcap[i] = -1 - } -} - -// shouldVisit reports whether the combination of (pc, pos) has not -// been visited yet. -func (b *bitState) shouldVisit(pc uint32, pos int) bool { - n := uint(int(pc)*(b.end+1) + pos) - if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 { - return false - } - b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1)) - return true -} - -// push pushes (pc, pos, arg) onto the job stack if it should be -// visited. -func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) { - // Only check shouldVisit when arg is false. - // When arg is true, we are continuing a previous visit. - if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) { - b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos}) - } -} - -// tryBacktrack runs a backtracking search starting at pos. -func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { - longest := re.longest - - b.push(re, pc, pos, false) - for len(b.jobs) > 0 { - l := len(b.jobs) - 1 - // Pop job off the stack. - pc := b.jobs[l].pc - pos := b.jobs[l].pos - arg := b.jobs[l].arg - b.jobs = b.jobs[:l] - - // Optimization: rather than push and pop, - // code that is going to Push and continue - // the loop simply updates ip, p, and arg - // and jumps to CheckAndLoop. We have to - // do the ShouldVisit check that Push - // would have, but we avoid the stack - // manipulation. - goto Skip - CheckAndLoop: - if !b.shouldVisit(pc, pos) { - continue - } - Skip: - - inst := &re.prog.Inst[pc] - - switch inst.Op { - default: - panic("bad inst") - case syntax.InstFail: - panic("unexpected InstFail") - case syntax.InstAlt: - // Cannot just - // b.push(inst.Out, pos, false) - // b.push(inst.Arg, pos, false) - // If during the processing of inst.Out, we encounter - // inst.Arg via another path, we want to process it then. - // Pushing it here will inhibit that. Instead, re-push - // inst with arg==true as a reminder to push inst.Arg out - // later. - if arg { - // Finished inst.Out; try inst.Arg. - arg = false - pc = inst.Arg - goto CheckAndLoop - } else { - b.push(re, pc, pos, true) - pc = inst.Out - goto CheckAndLoop - } - - case syntax.InstAltMatch: - // One opcode consumes runes; the other leads to match. - switch re.prog.Inst[inst.Out].Op { - case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - // inst.Arg is the match. - b.push(re, inst.Arg, pos, false) - pc = inst.Arg - pos = b.end - goto CheckAndLoop - } - // inst.Out is the match - non-greedy - b.push(re, inst.Out, b.end, false) - pc = inst.Out - goto CheckAndLoop - - case syntax.InstRune: - r, width := i.step(pos) - if !inst.MatchRune(r) { - continue - } - pos += width - pc = inst.Out - goto CheckAndLoop - - case syntax.InstRune1: - r, width := i.step(pos) - if r != inst.Rune[0] { - continue - } - pos += width - pc = inst.Out - goto CheckAndLoop - - case syntax.InstRuneAnyNotNL: - r, width := i.step(pos) - if r == '\n' || r == endOfText { - continue - } - pos += width - pc = inst.Out - goto CheckAndLoop - - case syntax.InstRuneAny: - r, width := i.step(pos) - if r == endOfText { - continue - } - pos += width - pc = inst.Out - goto CheckAndLoop - - case syntax.InstCapture: - if arg { - // Finished inst.Out; restore the old value. - b.cap[inst.Arg] = pos - continue - } else { - if inst.Arg < uint32(len(b.cap)) { - // Capture pos to register, but save old value. - b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done. - b.cap[inst.Arg] = pos - } - pc = inst.Out - goto CheckAndLoop - } - - case syntax.InstEmptyWidth: - flag := i.context(pos) - if !flag.match(syntax.EmptyOp(inst.Arg)) { - continue - } - pc = inst.Out - goto CheckAndLoop - - case syntax.InstNop: - pc = inst.Out - goto CheckAndLoop - - case syntax.InstMatch: - // We found a match. If the caller doesn't care - // where the match is, no point going further. - if len(b.cap) == 0 { - return true - } - - // Record best match so far. - // Only need to check end point, because this entire - // call is only considering one start position. - if len(b.cap) > 1 { - b.cap[1] = pos - } - if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) { - copy(b.matchcap, b.cap) - } - - // If going for first match, we're done. - if !longest { - return true - } - - // If we used the entire text, no longer match is possible. - if pos == b.end { - return true - } - - // Otherwise, continue on in hope of a longer match. - continue - } - } - - return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0 -} - -// backtrack runs a backtracking search of prog on the input starting at pos. -func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int { - startCond := re.cond - if startCond == ^syntax.EmptyOp(0) { // impossible - return nil - } - if startCond&syntax.EmptyBeginText != 0 && pos != 0 { - // Anchored match, past beginning of text. - return nil - } - - b := newBitState() - i, end := b.inputs.init(nil, ib, is) - b.reset(re.prog, end, ncap) - - // Anchored search must start at the beginning of the input - if startCond&syntax.EmptyBeginText != 0 { - if len(b.cap) > 0 { - b.cap[0] = pos - } - if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { - freeBitState(b) - return nil - } - } else { - - // Unanchored search, starting from each possible text position. - // Notice that we have to try the empty string at the end of - // the text, so the loop condition is pos <= end, not pos < end. - // This looks like it's quadratic in the size of the text, - // but we are not clearing visited between calls to TrySearch, - // so no work is duplicated and it ends up still being linear. - width := -1 - for ; pos <= end && width != 0; pos += width { - if len(re.prefix) > 0 { - // Match requires literal prefix; fast search for it. - advance := i.index(re, pos) - if advance < 0 { - freeBitState(b) - return nil - } - pos += advance - } - - if len(b.cap) > 0 { - b.cap[0] = pos - } - if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { - // Match must be leftmost; done. - goto Match - } - _, width = i.step(pos) - } - freeBitState(b) - return nil - } - -Match: - dstCap = append(dstCap, b.matchcap...) - freeBitState(b) - return dstCap -} diff --git a/vendor/github.com/grafana/regexp/exec.go b/vendor/github.com/grafana/regexp/exec.go deleted file mode 100644 index 3fc4b684f..000000000 --- a/vendor/github.com/grafana/regexp/exec.go +++ /dev/null @@ -1,554 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "io" - "regexp/syntax" - "sync" -) - -// A queue is a 'sparse array' holding pending threads of execution. -// See https://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html -type queue struct { - sparse []uint32 - dense []entry -} - -// An entry is an entry on a queue. -// It holds both the instruction pc and the actual thread. -// Some queue entries are just place holders so that the machine -// knows it has considered that pc. Such entries have t == nil. -type entry struct { - pc uint32 - t *thread -} - -// A thread is the state of a single path through the machine: -// an instruction and a corresponding capture array. -// See https://swtch.com/~rsc/regexp/regexp2.html -type thread struct { - inst *syntax.Inst - cap []int -} - -// A machine holds all the state during an NFA simulation for p. -type machine struct { - re *Regexp // corresponding Regexp - p *syntax.Prog // compiled program - q0, q1 queue // two queues for runq, nextq - pool []*thread // pool of available threads - matched bool // whether a match was found - matchcap []int // capture information for the match - - inputs inputs -} - -type inputs struct { - // cached inputs, to avoid allocation - bytes inputBytes - string inputString - reader inputReader -} - -func (i *inputs) newBytes(b []byte) input { - i.bytes.str = b - return &i.bytes -} - -func (i *inputs) newString(s string) input { - i.string.str = s - return &i.string -} - -func (i *inputs) newReader(r io.RuneReader) input { - i.reader.r = r - i.reader.atEOT = false - i.reader.pos = 0 - return &i.reader -} - -func (i *inputs) clear() { - // We need to clear 1 of these. - // Avoid the expense of clearing the others (pointer write barrier). - if i.bytes.str != nil { - i.bytes.str = nil - } else if i.reader.r != nil { - i.reader.r = nil - } else { - i.string.str = "" - } -} - -func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) { - if r != nil { - return i.newReader(r), 0 - } - if b != nil { - return i.newBytes(b), len(b) - } - return i.newString(s), len(s) -} - -func (m *machine) init(ncap int) { - for _, t := range m.pool { - t.cap = t.cap[:ncap] - } - m.matchcap = m.matchcap[:ncap] -} - -// alloc allocates a new thread with the given instruction. -// It uses the free pool if possible. -func (m *machine) alloc(i *syntax.Inst) *thread { - var t *thread - if n := len(m.pool); n > 0 { - t = m.pool[n-1] - m.pool = m.pool[:n-1] - } else { - t = new(thread) - t.cap = make([]int, len(m.matchcap), cap(m.matchcap)) - } - t.inst = i - return t -} - -// A lazyFlag is a lazily-evaluated syntax.EmptyOp, -// for checking zero-width flags like ^ $ \A \z \B \b. -// It records the pair of relevant runes and does not -// determine the implied flags until absolutely necessary -// (most of the time, that means never). -type lazyFlag uint64 - -func newLazyFlag(r1, r2 rune) lazyFlag { - return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2))) -} - -func (f lazyFlag) match(op syntax.EmptyOp) bool { - if op == 0 { - return true - } - r1 := rune(f >> 32) - if op&syntax.EmptyBeginLine != 0 { - if r1 != '\n' && r1 >= 0 { - return false - } - op &^= syntax.EmptyBeginLine - } - if op&syntax.EmptyBeginText != 0 { - if r1 >= 0 { - return false - } - op &^= syntax.EmptyBeginText - } - if op == 0 { - return true - } - r2 := rune(f) - if op&syntax.EmptyEndLine != 0 { - if r2 != '\n' && r2 >= 0 { - return false - } - op &^= syntax.EmptyEndLine - } - if op&syntax.EmptyEndText != 0 { - if r2 >= 0 { - return false - } - op &^= syntax.EmptyEndText - } - if op == 0 { - return true - } - if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) { - op &^= syntax.EmptyWordBoundary - } else { - op &^= syntax.EmptyNoWordBoundary - } - return op == 0 -} - -// match runs the machine over the input starting at pos. -// It reports whether a match was found. -// If so, m.matchcap holds the submatch information. -func (m *machine) match(i input, pos int) bool { - startCond := m.re.cond - if startCond == ^syntax.EmptyOp(0) { // impossible - return false - } - m.matched = false - for i := range m.matchcap { - m.matchcap[i] = -1 - } - runq, nextq := &m.q0, &m.q1 - r, r1 := endOfText, endOfText - width, width1 := 0, 0 - r, width = i.step(pos) - if r != endOfText { - r1, width1 = i.step(pos + width) - } - var flag lazyFlag - if pos == 0 { - flag = newLazyFlag(-1, r) - } else { - flag = i.context(pos) - } - for { - if len(runq.dense) == 0 { - if startCond&syntax.EmptyBeginText != 0 && pos != 0 { - // Anchored match, past beginning of text. - break - } - if m.matched { - // Have match; finished exploring alternatives. - break - } - if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() { - // Match requires literal prefix; fast search for it. - advance := i.index(m.re, pos) - if advance < 0 { - break - } - pos += advance - r, width = i.step(pos) - r1, width1 = i.step(pos + width) - } - } - if !m.matched { - if len(m.matchcap) > 0 { - m.matchcap[0] = pos - } - m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil) - } - flag = newLazyFlag(r, r1) - m.step(runq, nextq, pos, pos+width, r, &flag) - if width == 0 { - break - } - if len(m.matchcap) == 0 && m.matched { - // Found a match and not paying attention - // to where it is, so any match will do. - break - } - pos += width - r, width = r1, width1 - if r != endOfText { - r1, width1 = i.step(pos + width) - } - runq, nextq = nextq, runq - } - m.clear(nextq) - return m.matched -} - -// clear frees all threads on the thread queue. -func (m *machine) clear(q *queue) { - for _, d := range q.dense { - if d.t != nil { - m.pool = append(m.pool, d.t) - } - } - q.dense = q.dense[:0] -} - -// step executes one step of the machine, running each of the threads -// on runq and appending new threads to nextq. -// The step processes the rune c (which may be endOfText), -// which starts at position pos and ends at nextPos. -// nextCond gives the setting for the empty-width flags after c. -func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) { - longest := m.re.longest - for j := 0; j < len(runq.dense); j++ { - d := &runq.dense[j] - t := d.t - if t == nil { - continue - } - if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] { - m.pool = append(m.pool, t) - continue - } - i := t.inst - add := false - switch i.Op { - default: - panic("bad inst") - - case syntax.InstMatch: - if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) { - t.cap[1] = pos - copy(m.matchcap, t.cap) - } - if !longest { - // First-match mode: cut off all lower-priority threads. - for _, d := range runq.dense[j+1:] { - if d.t != nil { - m.pool = append(m.pool, d.t) - } - } - runq.dense = runq.dense[:0] - } - m.matched = true - - case syntax.InstRune: - add = i.MatchRune(c) - case syntax.InstRune1: - add = c == i.Rune[0] - case syntax.InstRuneAny: - add = true - case syntax.InstRuneAnyNotNL: - add = c != '\n' - } - if add { - t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t) - } - if t != nil { - m.pool = append(m.pool, t) - } - } - runq.dense = runq.dense[:0] -} - -// add adds an entry to q for pc, unless the q already has such an entry. -// It also recursively adds an entry for all instructions reachable from pc by following -// empty-width conditions satisfied by cond. pos gives the current position -// in the input. -func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread { -Again: - if pc == 0 { - return t - } - if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc { - return t - } - - j := len(q.dense) - q.dense = q.dense[:j+1] - d := &q.dense[j] - d.t = nil - d.pc = pc - q.sparse[pc] = uint32(j) - - i := &m.p.Inst[pc] - switch i.Op { - default: - panic("unhandled") - case syntax.InstFail: - // nothing - case syntax.InstAlt, syntax.InstAltMatch: - t = m.add(q, i.Out, pos, cap, cond, t) - pc = i.Arg - goto Again - case syntax.InstEmptyWidth: - if cond.match(syntax.EmptyOp(i.Arg)) { - pc = i.Out - goto Again - } - case syntax.InstNop: - pc = i.Out - goto Again - case syntax.InstCapture: - if int(i.Arg) < len(cap) { - opos := cap[i.Arg] - cap[i.Arg] = pos - m.add(q, i.Out, pos, cap, cond, nil) - cap[i.Arg] = opos - } else { - pc = i.Out - goto Again - } - case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - if t == nil { - t = m.alloc(i) - } else { - t.inst = i - } - if len(cap) > 0 && &t.cap[0] != &cap[0] { - copy(t.cap, cap) - } - d.t = t - t = nil - } - return t -} - -type onePassMachine struct { - inputs inputs - matchcap []int -} - -var onePassPool sync.Pool - -func newOnePassMachine() *onePassMachine { - m, ok := onePassPool.Get().(*onePassMachine) - if !ok { - m = new(onePassMachine) - } - return m -} - -func freeOnePassMachine(m *onePassMachine) { - m.inputs.clear() - onePassPool.Put(m) -} - -// doOnePass implements r.doExecute using the one-pass execution engine. -func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int { - startCond := re.cond - if startCond == ^syntax.EmptyOp(0) { // impossible - return nil - } - - m := newOnePassMachine() - if cap(m.matchcap) < ncap { - m.matchcap = make([]int, ncap) - } else { - m.matchcap = m.matchcap[:ncap] - } - - matched := false - for i := range m.matchcap { - m.matchcap[i] = -1 - } - - i, _ := m.inputs.init(ir, ib, is) - - r, r1 := endOfText, endOfText - width, width1 := 0, 0 - r, width = i.step(pos) - if r != endOfText { - r1, width1 = i.step(pos + width) - } - var flag lazyFlag - if pos == 0 { - flag = newLazyFlag(-1, r) - } else { - flag = i.context(pos) - } - pc := re.onepass.Start - inst := &re.onepass.Inst[pc] - // If there is a simple literal prefix, skip over it. - if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) && - len(re.prefix) > 0 && i.canCheckPrefix() { - // Match requires literal prefix; fast search for it. - if !i.hasPrefix(re) { - goto Return - } - pos += len(re.prefix) - r, width = i.step(pos) - r1, width1 = i.step(pos + width) - flag = i.context(pos) - pc = int(re.prefixEnd) - } - for { - inst = &re.onepass.Inst[pc] - pc = int(inst.Out) - switch inst.Op { - default: - panic("bad inst") - case syntax.InstMatch: - matched = true - if len(m.matchcap) > 0 { - m.matchcap[0] = 0 - m.matchcap[1] = pos - } - goto Return - case syntax.InstRune: - if !inst.MatchRune(r) { - goto Return - } - case syntax.InstRune1: - if r != inst.Rune[0] { - goto Return - } - case syntax.InstRuneAny: - // Nothing - case syntax.InstRuneAnyNotNL: - if r == '\n' { - goto Return - } - // peek at the input rune to see which branch of the Alt to take - case syntax.InstAlt, syntax.InstAltMatch: - pc = int(onePassNext(inst, r)) - continue - case syntax.InstFail: - goto Return - case syntax.InstNop: - continue - case syntax.InstEmptyWidth: - if !flag.match(syntax.EmptyOp(inst.Arg)) { - goto Return - } - continue - case syntax.InstCapture: - if int(inst.Arg) < len(m.matchcap) { - m.matchcap[inst.Arg] = pos - } - continue - } - if width == 0 { - break - } - flag = newLazyFlag(r, r1) - pos += width - r, width = r1, width1 - if r != endOfText { - r1, width1 = i.step(pos + width) - } - } - -Return: - if !matched { - freeOnePassMachine(m) - return nil - } - - dstCap = append(dstCap, m.matchcap...) - freeOnePassMachine(m) - return dstCap -} - -// doMatch reports whether either r, b or s match the regexp. -func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool { - return re.doExecute(r, b, s, 0, 0, nil) != nil -} - -// doExecute finds the leftmost match in the input, appends the position -// of its subexpressions to dstCap and returns dstCap. -// -// nil is returned if no matches are found and non-nil if matches are found. -func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int { - if dstCap == nil { - // Make sure 'return dstCap' is non-nil. - dstCap = arrayNoInts[:0:0] - } - - if r == nil && len(b)+len(s) < re.minInputLen { - return nil - } - - if re.onepass != nil { - return re.doOnePass(r, b, s, pos, ncap, dstCap) - } - if r == nil && len(b)+len(s) < re.maxBitStateLen { - return re.backtrack(b, s, pos, ncap, dstCap) - } - - m := re.get() - i, _ := m.inputs.init(r, b, s) - - m.init(ncap) - if !m.match(i, pos) { - re.put(m) - return nil - } - - dstCap = append(dstCap, m.matchcap...) - re.put(m) - return dstCap -} - -// arrayNoInts is returned by doExecute match if nil dstCap is passed -// to it with ncap=0. -var arrayNoInts [0]int diff --git a/vendor/github.com/grafana/regexp/onepass.go b/vendor/github.com/grafana/regexp/onepass.go deleted file mode 100644 index 53cbd9583..000000000 --- a/vendor/github.com/grafana/regexp/onepass.go +++ /dev/null @@ -1,500 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package regexp - -import ( - "regexp/syntax" - "slices" - "strings" - "unicode" - "unicode/utf8" -) - -// "One-pass" regexp execution. -// Some regexps can be analyzed to determine that they never need -// backtracking: they are guaranteed to run in one pass over the string -// without bothering to save all the usual NFA state. -// Detect those and execute them more quickly. - -// A onePassProg is a compiled one-pass regular expression program. -// It is the same as syntax.Prog except for the use of onePassInst. -type onePassProg struct { - Inst []onePassInst - Start int // index of start instruction - NumCap int // number of InstCapture insts in re -} - -// A onePassInst is a single instruction in a one-pass regular expression program. -// It is the same as syntax.Inst except for the new 'Next' field. -type onePassInst struct { - syntax.Inst - Next []uint32 -} - -// onePassPrefix returns a literal string that all matches for the -// regexp must start with. Complete is true if the prefix -// is the entire match. Pc is the index of the last rune instruction -// in the string. The onePassPrefix skips over the mandatory -// EmptyBeginText. -func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) { - i := &p.Inst[p.Start] - if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 { - return "", i.Op == syntax.InstMatch, uint32(p.Start) - } - pc = i.Out - i = &p.Inst[pc] - for i.Op == syntax.InstNop { - pc = i.Out - i = &p.Inst[pc] - } - // Avoid allocation of buffer if prefix is empty. - if iop(i) != syntax.InstRune || len(i.Rune) != 1 { - return "", i.Op == syntax.InstMatch, uint32(p.Start) - } - - // Have prefix; gather characters. - var buf strings.Builder - for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError { - buf.WriteRune(i.Rune[0]) - pc, i = i.Out, &p.Inst[i.Out] - } - if i.Op == syntax.InstEmptyWidth && - syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 && - p.Inst[i.Out].Op == syntax.InstMatch { - complete = true - } - return buf.String(), complete, pc -} - -// onePassNext selects the next actionable state of the prog, based on the input character. -// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine. -// One of the alternates may ultimately lead without input to end of line. If the instruction -// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next. -func onePassNext(i *onePassInst, r rune) uint32 { - next := i.MatchRunePos(r) - if next >= 0 { - return i.Next[next] - } - if i.Op == syntax.InstAltMatch { - return i.Out - } - return 0 -} - -func iop(i *syntax.Inst) syntax.InstOp { - op := i.Op - switch op { - case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - op = syntax.InstRune - } - return op -} - -// Sparse Array implementation is used as a queueOnePass. -type queueOnePass struct { - sparse []uint32 - dense []uint32 - size, nextIndex uint32 -} - -func (q *queueOnePass) empty() bool { - return q.nextIndex >= q.size -} - -func (q *queueOnePass) next() (n uint32) { - n = q.dense[q.nextIndex] - q.nextIndex++ - return -} - -func (q *queueOnePass) clear() { - q.size = 0 - q.nextIndex = 0 -} - -func (q *queueOnePass) contains(u uint32) bool { - if u >= uint32(len(q.sparse)) { - return false - } - return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u -} - -func (q *queueOnePass) insert(u uint32) { - if !q.contains(u) { - q.insertNew(u) - } -} - -func (q *queueOnePass) insertNew(u uint32) { - if u >= uint32(len(q.sparse)) { - return - } - q.sparse[u] = q.size - q.dense[q.size] = u - q.size++ -} - -func newQueue(size int) (q *queueOnePass) { - return &queueOnePass{ - sparse: make([]uint32, size), - dense: make([]uint32, size), - } -} - -// mergeRuneSets merges two non-intersecting runesets, and returns the merged result, -// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index -// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a -// NextIp array with the single element mergeFailed is returned. -// The code assumes that both inputs contain ordered and non-intersecting rune pairs. -const mergeFailed = uint32(0xffffffff) - -var ( - noRune = []rune{} - noNext = []uint32{mergeFailed} -) - -func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) { - leftLen := len(*leftRunes) - rightLen := len(*rightRunes) - if leftLen&0x1 != 0 || rightLen&0x1 != 0 { - panic("mergeRuneSets odd length []rune") - } - var ( - lx, rx int - ) - merged := make([]rune, 0) - next := make([]uint32, 0) - ok := true - defer func() { - if !ok { - merged = nil - next = nil - } - }() - - ix := -1 - extend := func(newLow *int, newArray *[]rune, pc uint32) bool { - if ix > 0 && (*newArray)[*newLow] <= merged[ix] { - return false - } - merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1]) - *newLow += 2 - ix += 2 - next = append(next, pc) - return true - } - - for lx < leftLen || rx < rightLen { - switch { - case rx >= rightLen: - ok = extend(&lx, leftRunes, leftPC) - case lx >= leftLen: - ok = extend(&rx, rightRunes, rightPC) - case (*rightRunes)[rx] < (*leftRunes)[lx]: - ok = extend(&rx, rightRunes, rightPC) - default: - ok = extend(&lx, leftRunes, leftPC) - } - if !ok { - return noRune, noNext - } - } - return merged, next -} - -// cleanupOnePass drops working memory, and restores certain shortcut instructions. -func cleanupOnePass(prog *onePassProg, original *syntax.Prog) { - for ix, instOriginal := range original.Inst { - switch instOriginal.Op { - case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune: - case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail: - prog.Inst[ix].Next = nil - case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: - prog.Inst[ix].Next = nil - prog.Inst[ix] = onePassInst{Inst: instOriginal} - } - } -} - -// onePassCopy creates a copy of the original Prog, as we'll be modifying it. -func onePassCopy(prog *syntax.Prog) *onePassProg { - p := &onePassProg{ - Start: prog.Start, - NumCap: prog.NumCap, - Inst: make([]onePassInst, len(prog.Inst)), - } - for i, inst := range prog.Inst { - p.Inst[i] = onePassInst{Inst: inst} - } - - // rewrites one or more common Prog constructs that enable some otherwise - // non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at - // ip A, that points to ips B & C. - // A:BC + B:DA => A:BC + B:CD - // A:BC + B:DC => A:DC + B:DC - for pc := range p.Inst { - switch p.Inst[pc].Op { - default: - continue - case syntax.InstAlt, syntax.InstAltMatch: - // A:Bx + B:Ay - p_A_Other := &p.Inst[pc].Out - p_A_Alt := &p.Inst[pc].Arg - // make sure a target is another Alt - instAlt := p.Inst[*p_A_Alt] - if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) { - p_A_Alt, p_A_Other = p_A_Other, p_A_Alt - instAlt = p.Inst[*p_A_Alt] - if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) { - continue - } - } - instOther := p.Inst[*p_A_Other] - // Analyzing both legs pointing to Alts is for another day - if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch { - // too complicated - continue - } - // simple empty transition loop - // A:BC + B:DA => A:BC + B:DC - p_B_Alt := &p.Inst[*p_A_Alt].Out - p_B_Other := &p.Inst[*p_A_Alt].Arg - patch := false - if instAlt.Out == uint32(pc) { - patch = true - } else if instAlt.Arg == uint32(pc) { - patch = true - p_B_Alt, p_B_Other = p_B_Other, p_B_Alt - } - if patch { - *p_B_Alt = *p_A_Other - } - - // empty transition to common target - // A:BC + B:DC => A:DC + B:DC - if *p_A_Other == *p_B_Alt { - *p_A_Alt = *p_B_Other - } - } - } - return p -} - -var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune} -var anyRune = []rune{0, unicode.MaxRune} - -// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt, -// the match engine can always tell which branch to take. The routine may modify -// p if it is turned into a onepass Prog. If it isn't possible for this to be a -// onepass Prog, the Prog nil is returned. makeOnePass is recursive -// to the size of the Prog. -func makeOnePass(p *onePassProg) *onePassProg { - // If the machine is very long, it's not worth the time to check if we can use one pass. - if len(p.Inst) >= 1000 { - return nil - } - - var ( - instQueue = newQueue(len(p.Inst)) - visitQueue = newQueue(len(p.Inst)) - check func(uint32, []bool) bool - onePassRunes = make([][]rune, len(p.Inst)) - ) - - // check that paths from Alt instructions are unambiguous, and rebuild the new - // program as a onepass program - check = func(pc uint32, m []bool) (ok bool) { - ok = true - inst := &p.Inst[pc] - if visitQueue.contains(pc) { - return - } - visitQueue.insert(pc) - switch inst.Op { - case syntax.InstAlt, syntax.InstAltMatch: - ok = check(inst.Out, m) && check(inst.Arg, m) - // check no-input paths to InstMatch - matchOut := m[inst.Out] - matchArg := m[inst.Arg] - if matchOut && matchArg { - ok = false - break - } - // Match on empty goes in inst.Out - if matchArg { - inst.Out, inst.Arg = inst.Arg, inst.Out - matchOut, matchArg = matchArg, matchOut - } - if matchOut { - m[pc] = true - inst.Op = syntax.InstAltMatch - } - - // build a dispatch operator from the two legs of the alt. - onePassRunes[pc], inst.Next = mergeRuneSets( - &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg) - if len(inst.Next) > 0 && inst.Next[0] == mergeFailed { - ok = false - break - } - case syntax.InstCapture, syntax.InstNop: - ok = check(inst.Out, m) - m[pc] = m[inst.Out] - // pass matching runes back through these no-ops. - onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) - inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) - for i := range inst.Next { - inst.Next[i] = inst.Out - } - case syntax.InstEmptyWidth: - ok = check(inst.Out, m) - m[pc] = m[inst.Out] - onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) - inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) - for i := range inst.Next { - inst.Next[i] = inst.Out - } - case syntax.InstMatch, syntax.InstFail: - m[pc] = inst.Op == syntax.InstMatch - case syntax.InstRune: - m[pc] = false - if len(inst.Next) > 0 { - break - } - instQueue.insert(inst.Out) - if len(inst.Rune) == 0 { - onePassRunes[pc] = []rune{} - inst.Next = []uint32{inst.Out} - break - } - runes := make([]rune, 0) - if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { - r0 := inst.Rune[0] - runes = append(runes, r0, r0) - for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { - runes = append(runes, r1, r1) - } - slices.Sort(runes) - } else { - runes = append(runes, inst.Rune...) - } - onePassRunes[pc] = runes - inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) - for i := range inst.Next { - inst.Next[i] = inst.Out - } - inst.Op = syntax.InstRune - case syntax.InstRune1: - m[pc] = false - if len(inst.Next) > 0 { - break - } - instQueue.insert(inst.Out) - runes := []rune{} - // expand case-folded runes - if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { - r0 := inst.Rune[0] - runes = append(runes, r0, r0) - for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { - runes = append(runes, r1, r1) - } - slices.Sort(runes) - } else { - runes = append(runes, inst.Rune[0], inst.Rune[0]) - } - onePassRunes[pc] = runes - inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) - for i := range inst.Next { - inst.Next[i] = inst.Out - } - inst.Op = syntax.InstRune - case syntax.InstRuneAny: - m[pc] = false - if len(inst.Next) > 0 { - break - } - instQueue.insert(inst.Out) - onePassRunes[pc] = append([]rune{}, anyRune...) - inst.Next = []uint32{inst.Out} - case syntax.InstRuneAnyNotNL: - m[pc] = false - if len(inst.Next) > 0 { - break - } - instQueue.insert(inst.Out) - onePassRunes[pc] = append([]rune{}, anyRuneNotNL...) - inst.Next = make([]uint32, len(onePassRunes[pc])/2+1) - for i := range inst.Next { - inst.Next[i] = inst.Out - } - } - return - } - - instQueue.clear() - instQueue.insert(uint32(p.Start)) - m := make([]bool, len(p.Inst)) - for !instQueue.empty() { - visitQueue.clear() - pc := instQueue.next() - if !check(pc, m) { - p = nil - break - } - } - if p != nil { - for i := range p.Inst { - p.Inst[i].Rune = onePassRunes[i] - } - } - return p -} - -// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog -// can be recharacterized as a one-pass regexp program, or syntax.nil if the -// Prog cannot be converted. For a one pass prog, the fundamental condition that must -// be true is: at any InstAlt, there must be no ambiguity about what branch to take. -func compileOnePass(prog *syntax.Prog) (p *onePassProg) { - if prog.Start == 0 { - return nil - } - // onepass regexp is anchored - if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth || - syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText { - return nil - } - // every instruction leading to InstMatch must be EmptyEndText - for _, inst := range prog.Inst { - opOut := prog.Inst[inst.Out].Op - switch inst.Op { - default: - if opOut == syntax.InstMatch { - return nil - } - case syntax.InstAlt, syntax.InstAltMatch: - if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch { - return nil - } - case syntax.InstEmptyWidth: - if opOut == syntax.InstMatch { - if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText { - continue - } - return nil - } - } - } - // Creates a slightly optimized copy of the original Prog - // that cleans up some Prog idioms that block valid onepass programs - p = onePassCopy(prog) - - // checkAmbiguity on InstAlts, build onepass Prog if possible - p = makeOnePass(p) - - if p != nil { - cleanupOnePass(p, prog) - } - return p -} diff --git a/vendor/github.com/grafana/regexp/regexp.go b/vendor/github.com/grafana/regexp/regexp.go deleted file mode 100644 index d1218ad0e..000000000 --- a/vendor/github.com/grafana/regexp/regexp.go +++ /dev/null @@ -1,1304 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package regexp implements regular expression search. -// -// The syntax of the regular expressions accepted is the same -// general syntax used by Perl, Python, and other languages. -// More precisely, it is the syntax accepted by RE2 and described at -// https://golang.org/s/re2syntax, except for \C. -// For an overview of the syntax, see the [regexp/syntax] package. -// -// The regexp implementation provided by this package is -// guaranteed to run in time linear in the size of the input. -// (This is a property not guaranteed by most open source -// implementations of regular expressions.) For more information -// about this property, see -// -// https://swtch.com/~rsc/regexp/regexp1.html -// -// or any book about automata theory. -// -// All characters are UTF-8-encoded code points. -// Following [utf8.DecodeRune], each byte of an invalid UTF-8 sequence -// is treated as if it encoded utf8.RuneError (U+FFFD). -// -// There are 16 methods of [Regexp] that match a regular expression and identify -// the matched text. Their names are matched by this regular expression: -// -// Find(All)?(String)?(Submatch)?(Index)? -// -// If 'All' is present, the routine matches successive non-overlapping -// matches of the entire expression. Empty matches abutting a preceding -// match are ignored. The return value is a slice containing the successive -// return values of the corresponding non-'All' routine. These routines take -// an extra integer argument, n. If n >= 0, the function returns at most n -// matches/submatches; otherwise, it returns all of them. -// -// If 'String' is present, the argument is a string; otherwise it is a slice -// of bytes; return values are adjusted as appropriate. -// -// If 'Submatch' is present, the return value is a slice identifying the -// successive submatches of the expression. Submatches are matches of -// parenthesized subexpressions (also known as capturing groups) within the -// regular expression, numbered from left to right in order of opening -// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is -// the match of the first parenthesized subexpression, and so on. -// -// If 'Index' is present, matches and submatches are identified by byte index -// pairs within the input string: result[2*n:2*n+2] identifies the indexes of -// the nth submatch. The pair for n==0 identifies the match of the entire -// expression. If 'Index' is not present, the match is identified by the text -// of the match/submatch. If an index is negative or text is nil, it means that -// subexpression did not match any string in the input. For 'String' versions -// an empty string means either no match or an empty match. -// -// There is also a subset of the methods that can be applied to text read -// from a RuneReader: -// -// MatchReader, FindReaderIndex, FindReaderSubmatchIndex -// -// This set may grow. Note that regular expression matches may need to -// examine text beyond the text returned by a match, so the methods that -// match text from a RuneReader may read arbitrarily far into the input -// before returning. -// -// (There are a few other methods that do not match this pattern.) -package regexp - -import ( - "bytes" - "io" - "regexp/syntax" - "strconv" - "strings" - "sync" - "unicode" - "unicode/utf8" -) - -// Regexp is the representation of a compiled regular expression. -// A Regexp is safe for concurrent use by multiple goroutines, -// except for configuration methods, such as [Regexp.Longest]. -type Regexp struct { - expr string // as passed to Compile - prog *syntax.Prog // compiled program - onepass *onePassProg // onepass program or nil - numSubexp int - maxBitStateLen int - subexpNames []string - prefix string // required prefix in unanchored matches - prefixBytes []byte // prefix, as a []byte - prefixRune rune // first rune in prefix - prefixEnd uint32 // pc for last rune in prefix - mpool int // pool for machines - matchcap int // size of recorded match lengths - prefixComplete bool // prefix is the entire regexp - cond syntax.EmptyOp // empty-width conditions required at start of match - minInputLen int // minimum length of the input in bytes - - // This field can be modified by the Longest method, - // but it is otherwise read-only. - longest bool // whether regexp prefers leftmost-longest match -} - -// String returns the source text used to compile the regular expression. -func (re *Regexp) String() string { - return re.expr -} - -// Copy returns a new [Regexp] object copied from re. -// Calling [Regexp.Longest] on one copy does not affect another. -// -// Deprecated: In earlier releases, when using a [Regexp] in multiple goroutines, -// giving each goroutine its own copy helped to avoid lock contention. -// As of Go 1.12, using Copy is no longer necessary to avoid lock contention. -// Copy may still be appropriate if the reason for its use is to make -// two copies with different [Regexp.Longest] settings. -func (re *Regexp) Copy() *Regexp { - re2 := *re - return &re2 -} - -// Compile parses a regular expression and returns, if successful, -// a [Regexp] object that can be used to match against text. -// -// When matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses the one that a backtracking search would have found first. -// This so-called leftmost-first matching is the same semantics -// that Perl, Python, and other implementations use, although this -// package implements it without the expense of backtracking. -// For POSIX leftmost-longest matching, see [CompilePOSIX]. -func Compile(expr string) (*Regexp, error) { - return compile(expr, syntax.Perl, false) -} - -// CompilePOSIX is like [Compile] but restricts the regular expression -// to POSIX ERE (egrep) syntax and changes the match semantics to -// leftmost-longest. -// -// That is, when matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses a match that is as long as possible. -// This so-called leftmost-longest matching is the same semantics -// that early regular expression implementations used and that POSIX -// specifies. -// -// However, there can be multiple leftmost-longest matches, with different -// submatch choices, and here this package diverges from POSIX. -// Among the possible leftmost-longest matches, this package chooses -// the one that a backtracking search would have found first, while POSIX -// specifies that the match be chosen to maximize the length of the first -// subexpression, then the second, and so on from left to right. -// The POSIX rule is computationally prohibitive and not even well-defined. -// See https://swtch.com/~rsc/regexp/regexp2.html#posix for details. -func CompilePOSIX(expr string) (*Regexp, error) { - return compile(expr, syntax.POSIX, true) -} - -// Longest makes future searches prefer the leftmost-longest match. -// That is, when matching against text, the regexp returns a match that -// begins as early as possible in the input (leftmost), and among those -// it chooses a match that is as long as possible. -// This method modifies the [Regexp] and may not be called concurrently -// with any other methods. -func (re *Regexp) Longest() { - re.longest = true -} - -func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { - re, err := syntax.Parse(expr, mode) - if err != nil { - return nil, err - } - maxCap := re.MaxCap() - capNames := re.CapNames() - - re = re.Simplify() - prog, err := syntax.Compile(re) - if err != nil { - return nil, err - } - matchcap := prog.NumCap - if matchcap < 2 { - matchcap = 2 - } - regexp := &Regexp{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - cond: prog.StartCond(), - longest: longest, - matchcap: matchcap, - minInputLen: minInputLen(re), - } - if regexp.onepass == nil { - regexp.prefix, regexp.prefixComplete = prog.Prefix() - regexp.maxBitStateLen = maxBitStateLen(prog) - } else { - regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog) - } - if regexp.prefix != "" { - // TODO(rsc): Remove this allocation by adding - // IndexString to package bytes. - regexp.prefixBytes = []byte(regexp.prefix) - regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) - } - - n := len(prog.Inst) - i := 0 - for matchSize[i] != 0 && matchSize[i] < n { - i++ - } - regexp.mpool = i - - return regexp, nil -} - -// Pools of *machine for use during (*Regexp).doExecute, -// split up by the size of the execution queues. -// matchPool[i] machines have queue size matchSize[i]. -// On a 64-bit system each queue entry is 16 bytes, -// so matchPool[0] has 16*2*128 = 4kB queues, etc. -// The final matchPool is a catch-all for very large queues. -var ( - matchSize = [...]int{128, 512, 2048, 16384, 0} - matchPool [len(matchSize)]sync.Pool -) - -// get returns a machine to use for matching re. -// It uses the re's machine cache if possible, to avoid -// unnecessary allocation. -func (re *Regexp) get() *machine { - m, ok := matchPool[re.mpool].Get().(*machine) - if !ok { - m = new(machine) - } - m.re = re - m.p = re.prog - if cap(m.matchcap) < re.matchcap { - m.matchcap = make([]int, re.matchcap) - for _, t := range m.pool { - t.cap = make([]int, re.matchcap) - } - } - - // Allocate queues if needed. - // Or reallocate, for "large" match pool. - n := matchSize[re.mpool] - if n == 0 { // large pool - n = len(re.prog.Inst) - } - if len(m.q0.sparse) < n { - m.q0 = queue{make([]uint32, n), make([]entry, 0, n)} - m.q1 = queue{make([]uint32, n), make([]entry, 0, n)} - } - return m -} - -// put returns a machine to the correct machine pool. -func (re *Regexp) put(m *machine) { - m.re = nil - m.p = nil - m.inputs.clear() - matchPool[re.mpool].Put(m) -} - -// minInputLen walks the regexp to find the minimum length of any matchable input. -func minInputLen(re *syntax.Regexp) int { - switch re.Op { - default: - return 0 - case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass: - return 1 - case syntax.OpLiteral: - l := 0 - for _, r := range re.Rune { - if r == utf8.RuneError { - l++ - } else { - l += utf8.RuneLen(r) - } - } - return l - case syntax.OpCapture, syntax.OpPlus: - return minInputLen(re.Sub[0]) - case syntax.OpRepeat: - return re.Min * minInputLen(re.Sub[0]) - case syntax.OpConcat: - l := 0 - for _, sub := range re.Sub { - l += minInputLen(sub) - } - return l - case syntax.OpAlternate: - l := minInputLen(re.Sub[0]) - var lnext int - for _, sub := range re.Sub[1:] { - lnext = minInputLen(sub) - if lnext < l { - l = lnext - } - } - return l - } -} - -// MustCompile is like [Compile] but panics if the expression cannot be parsed. -// It simplifies safe initialization of global variables holding compiled regular -// expressions. -func MustCompile(str string) *Regexp { - regexp, err := Compile(str) - if err != nil { - panic(`regexp: Compile(` + quote(str) + `): ` + err.Error()) - } - return regexp -} - -// MustCompilePOSIX is like [CompilePOSIX] but panics if the expression cannot be parsed. -// It simplifies safe initialization of global variables holding compiled regular -// expressions. -func MustCompilePOSIX(str string) *Regexp { - regexp, err := CompilePOSIX(str) - if err != nil { - panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + err.Error()) - } - return regexp -} - -func quote(s string) string { - if strconv.CanBackquote(s) { - return "`" + s + "`" - } - return strconv.Quote(s) -} - -// NumSubexp returns the number of parenthesized subexpressions in this [Regexp]. -func (re *Regexp) NumSubexp() int { - return re.numSubexp -} - -// SubexpNames returns the names of the parenthesized subexpressions -// in this [Regexp]. The name for the first sub-expression is names[1], -// so that if m is a match slice, the name for m[i] is SubexpNames()[i]. -// Since the Regexp as a whole cannot be named, names[0] is always -// the empty string. The slice should not be modified. -func (re *Regexp) SubexpNames() []string { - return re.subexpNames -} - -// SubexpIndex returns the index of the first subexpression with the given name, -// or -1 if there is no subexpression with that name. -// -// Note that multiple subexpressions can be written using the same name, as in -// (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob". -// In this case, SubexpIndex returns the index of the leftmost such subexpression -// in the regular expression. -func (re *Regexp) SubexpIndex(name string) int { - if name != "" { - for i, s := range re.subexpNames { - if name == s { - return i - } - } - } - return -1 -} - -const endOfText rune = -1 - -// input abstracts different representations of the input text. It provides -// one-character lookahead. -type input interface { - step(pos int) (r rune, width int) // advance one rune - canCheckPrefix() bool // can we look ahead without losing info? - hasPrefix(re *Regexp) bool - index(re *Regexp, pos int) int - context(pos int) lazyFlag -} - -// inputString scans a string. -type inputString struct { - str string -} - -func (i *inputString) step(pos int) (rune, int) { - if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } - return utf8.DecodeRuneInString(i.str[pos:]) - } - return endOfText, 0 -} - -func (i *inputString) canCheckPrefix() bool { - return true -} - -func (i *inputString) hasPrefix(re *Regexp) bool { - return strings.HasPrefix(i.str, re.prefix) -} - -func (i *inputString) index(re *Regexp, pos int) int { - return strings.Index(i.str[pos:], re.prefix) -} - -func (i *inputString) context(pos int) lazyFlag { - r1, r2 := endOfText, endOfText - // 0 < pos && pos <= len(i.str) - if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) - } - } - // 0 <= pos && pos < len(i.str) - if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) - } - } - return newLazyFlag(r1, r2) -} - -// inputBytes scans a byte slice. -type inputBytes struct { - str []byte -} - -func (i *inputBytes) step(pos int) (rune, int) { - if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } - return utf8.DecodeRune(i.str[pos:]) - } - return endOfText, 0 -} - -func (i *inputBytes) canCheckPrefix() bool { - return true -} - -func (i *inputBytes) hasPrefix(re *Regexp) bool { - return bytes.HasPrefix(i.str, re.prefixBytes) -} - -func (i *inputBytes) index(re *Regexp, pos int) int { - return bytes.Index(i.str[pos:], re.prefixBytes) -} - -func (i *inputBytes) context(pos int) lazyFlag { - r1, r2 := endOfText, endOfText - // 0 < pos && pos <= len(i.str) - if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) - } - } - // 0 <= pos && pos < len(i.str) - if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRune(i.str[pos:]) - } - } - return newLazyFlag(r1, r2) -} - -// inputReader scans a RuneReader. -type inputReader struct { - r io.RuneReader - atEOT bool - pos int -} - -func (i *inputReader) step(pos int) (rune, int) { - if !i.atEOT && pos != i.pos { - return endOfText, 0 - - } - r, w, err := i.r.ReadRune() - if err != nil { - i.atEOT = true - return endOfText, 0 - } - i.pos += w - return r, w -} - -func (i *inputReader) canCheckPrefix() bool { - return false -} - -func (i *inputReader) hasPrefix(re *Regexp) bool { - return false -} - -func (i *inputReader) index(re *Regexp, pos int) int { - return -1 -} - -func (i *inputReader) context(pos int) lazyFlag { - return 0 // not used -} - -// LiteralPrefix returns a literal string that must begin any match -// of the regular expression re. It returns the boolean true if the -// literal string comprises the entire regular expression. -func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { - return re.prefix, re.prefixComplete -} - -// MatchReader reports whether the text returned by the [io.RuneReader] -// contains any match of the regular expression re. -func (re *Regexp) MatchReader(r io.RuneReader) bool { - return re.doMatch(r, nil, "") -} - -// MatchString reports whether the string s -// contains any match of the regular expression re. -func (re *Regexp) MatchString(s string) bool { - return re.doMatch(nil, nil, s) -} - -// Match reports whether the byte slice b -// contains any match of the regular expression re. -func (re *Regexp) Match(b []byte) bool { - return re.doMatch(nil, b, "") -} - -// MatchReader reports whether the text returned by the RuneReader -// contains any match of the regular expression pattern. -// More complicated queries need to use [Compile] and the full [Regexp] interface. -func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchReader(r), nil -} - -// MatchString reports whether the string s -// contains any match of the regular expression pattern. -// More complicated queries need to use [Compile] and the full [Regexp] interface. -func MatchString(pattern string, s string) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.MatchString(s), nil -} - -// Match reports whether the byte slice b -// contains any match of the regular expression pattern. -// More complicated queries need to use [Compile] and the full [Regexp] interface. -func Match(pattern string, b []byte) (matched bool, err error) { - re, err := Compile(pattern) - if err != nil { - return false, err - } - return re.Match(b), nil -} - -// ReplaceAllString returns a copy of src, replacing matches of the [Regexp] -// with the replacement string repl. -// Inside repl, $ signs are interpreted as in [Regexp.Expand]. -func (re *Regexp) ReplaceAllString(src, repl string) string { - n := 2 - if strings.Contains(repl, "$") { - n = 2 * (re.numSubexp + 1) - } - b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte { - return re.expand(dst, repl, nil, src, match) - }) - return string(b) -} - -// ReplaceAllLiteralString returns a copy of src, replacing matches of the [Regexp] -// with the replacement string repl. The replacement repl is substituted directly, -// without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { - return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { - return append(dst, repl...) - })) -} - -// ReplaceAllStringFunc returns a copy of src in which all matches of the -// [Regexp] have been replaced by the return value of function repl applied -// to the matched substring. The replacement returned by repl is substituted -// directly, without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { - b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { - return append(dst, repl(src[match[0]:match[1]])...) - }) - return string(b) -} - -func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte { - lastMatchEnd := 0 // end position of the most recent match - searchPos := 0 // position where we next look for a match - var buf []byte - var endPos int - if bsrc != nil { - endPos = len(bsrc) - } else { - endPos = len(src) - } - if nmatch > re.prog.NumCap { - nmatch = re.prog.NumCap - } - - var dstCap [2]int - for searchPos <= endPos { - a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0]) - if len(a) == 0 { - break // no more matches - } - - // Copy the unmatched characters before this match. - if bsrc != nil { - buf = append(buf, bsrc[lastMatchEnd:a[0]]...) - } else { - buf = append(buf, src[lastMatchEnd:a[0]]...) - } - - // Now insert a copy of the replacement string, but not for a - // match of the empty string immediately after another match. - // (Otherwise, we get double replacement for patterns that - // match both empty and nonempty strings.) - if a[1] > lastMatchEnd || a[0] == 0 { - buf = repl(buf, a) - } - lastMatchEnd = a[1] - - // Advance past this match; always advance at least one character. - var width int - if bsrc != nil { - _, width = utf8.DecodeRune(bsrc[searchPos:]) - } else { - _, width = utf8.DecodeRuneInString(src[searchPos:]) - } - if searchPos+width > a[1] { - searchPos += width - } else if searchPos+1 > a[1] { - // This clause is only needed at the end of the input - // string. In that case, DecodeRuneInString returns width=0. - searchPos++ - } else { - searchPos = a[1] - } - } - - // Copy the unmatched characters after the last match. - if bsrc != nil { - buf = append(buf, bsrc[lastMatchEnd:]...) - } else { - buf = append(buf, src[lastMatchEnd:]...) - } - - return buf -} - -// ReplaceAll returns a copy of src, replacing matches of the [Regexp] -// with the replacement text repl. -// Inside repl, $ signs are interpreted as in [Regexp.Expand]. -func (re *Regexp) ReplaceAll(src, repl []byte) []byte { - n := 2 - if bytes.IndexByte(repl, '$') >= 0 { - n = 2 * (re.numSubexp + 1) - } - srepl := "" - b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte { - if len(srepl) != len(repl) { - srepl = string(repl) - } - return re.expand(dst, srepl, src, "", match) - }) - return b -} - -// ReplaceAllLiteral returns a copy of src, replacing matches of the [Regexp] -// with the replacement bytes repl. The replacement repl is substituted directly, -// without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { - return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { - return append(dst, repl...) - }) -} - -// ReplaceAllFunc returns a copy of src in which all matches of the -// [Regexp] have been replaced by the return value of function repl applied -// to the matched byte slice. The replacement returned by repl is substituted -// directly, without using [Regexp.Expand]. -func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { - return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { - return append(dst, repl(src[match[0]:match[1]])...) - }) -} - -// Bitmap used by func special to check whether a character needs to be escaped. -var specialBytes [16]byte - -// special reports whether byte b needs to be escaped by QuoteMeta. -func special(b byte) bool { - return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 -} - -func init() { - for _, b := range []byte(`\.+*?()|[]{}^$`) { - specialBytes[b%16] |= 1 << (b / 16) - } -} - -// QuoteMeta returns a string that escapes all regular expression metacharacters -// inside the argument text; the returned string is a regular expression matching -// the literal text. -func QuoteMeta(s string) string { - // A byte loop is correct because all metacharacters are ASCII. - var i int - for i = 0; i < len(s); i++ { - if special(s[i]) { - break - } - } - // No meta characters found, so return original string. - if i >= len(s) { - return s - } - - b := make([]byte, 2*len(s)-i) - copy(b, s[:i]) - j := i - for ; i < len(s); i++ { - if special(s[i]) { - b[j] = '\\' - j++ - } - b[j] = s[i] - j++ - } - return string(b[:j]) -} - -// The number of capture values in the program may correspond -// to fewer capturing expressions than are in the regexp. -// For example, "(a){0}" turns into an empty program, so the -// maximum capture in the program is 0 but we need to return -// an expression for \1. Pad appends -1s to the slice a as needed. -func (re *Regexp) pad(a []int) []int { - if a == nil { - // No match. - return nil - } - n := (1 + re.numSubexp) * 2 - for len(a) < n { - a = append(a, -1) - } - return a -} - -// allMatches calls deliver at most n times -// with the location of successive matches in the input text. -// The input text is b if non-nil, otherwise s. -func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { - var end int - if b == nil { - end = len(s) - } else { - end = len(b) - } - - for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { - matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil) - if len(matches) == 0 { - break - } - - accept := true - if matches[1] == pos { - // We've found an empty match. - if matches[0] == prevMatchEnd { - // We don't allow an empty match right - // after a previous match, so ignore it. - accept = false - } - var width int - if b == nil { - is := inputString{str: s} - _, width = is.step(pos) - } else { - ib := inputBytes{str: b} - _, width = ib.step(pos) - } - if width > 0 { - pos += width - } else { - pos = end + 1 - } - } else { - pos = matches[1] - } - prevMatchEnd = matches[1] - - if accept { - deliver(re.pad(matches)) - i++ - } - } -} - -// Find returns a slice holding the text of the leftmost match in b of the regular expression. -// A return value of nil indicates no match. -func (re *Regexp) Find(b []byte) []byte { - var dstCap [2]int - a := re.doExecute(nil, b, "", 0, 2, dstCap[:0]) - if a == nil { - return nil - } - return b[a[0]:a[1]:a[1]] -} - -// FindIndex returns a two-element slice of integers defining the location of -// the leftmost match in b of the regular expression. The match itself is at -// b[loc[0]:loc[1]]. -// A return value of nil indicates no match. -func (re *Regexp) FindIndex(b []byte) (loc []int) { - a := re.doExecute(nil, b, "", 0, 2, nil) - if a == nil { - return nil - } - return a[0:2] -} - -// FindString returns a string holding the text of the leftmost match in s of the regular -// expression. If there is no match, the return value is an empty string, -// but it will also be empty if the regular expression successfully matches -// an empty string. Use [Regexp.FindStringIndex] or [Regexp.FindStringSubmatch] if it is -// necessary to distinguish these cases. -func (re *Regexp) FindString(s string) string { - var dstCap [2]int - a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0]) - if a == nil { - return "" - } - return s[a[0]:a[1]] -} - -// FindStringIndex returns a two-element slice of integers defining the -// location of the leftmost match in s of the regular expression. The match -// itself is at s[loc[0]:loc[1]]. -// A return value of nil indicates no match. -func (re *Regexp) FindStringIndex(s string) (loc []int) { - a := re.doExecute(nil, nil, s, 0, 2, nil) - if a == nil { - return nil - } - return a[0:2] -} - -// FindReaderIndex returns a two-element slice of integers defining the -// location of the leftmost match of the regular expression in text read from -// the [io.RuneReader]. The match text was found in the input stream at -// byte offset loc[0] through loc[1]-1. -// A return value of nil indicates no match. -func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { - a := re.doExecute(r, nil, "", 0, 2, nil) - if a == nil { - return nil - } - return a[0:2] -} - -// FindSubmatch returns a slice of slices holding the text of the leftmost -// match of the regular expression in b and the matches, if any, of its -// subexpressions, as defined by the 'Submatch' descriptions in the package -// comment. -// A return value of nil indicates no match. -func (re *Regexp) FindSubmatch(b []byte) [][]byte { - var dstCap [4]int - a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0]) - if a == nil { - return nil - } - ret := make([][]byte, 1+re.numSubexp) - for i := range ret { - if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = b[a[2*i]:a[2*i+1]:a[2*i+1]] - } - } - return ret -} - -// Expand appends template to dst and returns the result; during the -// append, Expand replaces variables in the template with corresponding -// matches drawn from src. The match slice should have been returned by -// [Regexp.FindSubmatchIndex]. -// -// In the template, a variable is denoted by a substring of the form -// $name or ${name}, where name is a non-empty sequence of letters, -// digits, and underscores. A purely numeric name like $1 refers to -// the submatch with the corresponding index; other names refer to -// capturing parentheses named with the (?P<name>...) syntax. A -// reference to an out of range or unmatched index or a name that is not -// present in the regular expression is replaced with an empty slice. -// -// In the $name form, name is taken to be as long as possible: $1x is -// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. -// -// To insert a literal $ in the output, use $$ in the template. -func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte { - return re.expand(dst, string(template), src, "", match) -} - -// ExpandString is like [Regexp.Expand] but the template and source are strings. -// It appends to and returns a byte slice in order to give the calling -// code control over allocation. -func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte { - return re.expand(dst, template, nil, src, match) -} - -func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte { - for len(template) > 0 { - before, after, ok := strings.Cut(template, "$") - if !ok { - break - } - dst = append(dst, before...) - template = after - if template != "" && template[0] == '$' { - // Treat $$ as $. - dst = append(dst, '$') - template = template[1:] - continue - } - name, num, rest, ok := extract(template) - if !ok { - // Malformed; treat $ as raw text. - dst = append(dst, '$') - continue - } - template = rest - if num >= 0 { - if 2*num+1 < len(match) && match[2*num] >= 0 { - if bsrc != nil { - dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...) - } else { - dst = append(dst, src[match[2*num]:match[2*num+1]]...) - } - } - } else { - for i, namei := range re.subexpNames { - if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 { - if bsrc != nil { - dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...) - } else { - dst = append(dst, src[match[2*i]:match[2*i+1]]...) - } - break - } - } - } - } - dst = append(dst, template...) - return dst -} - -// extract returns the name from a leading "name" or "{name}" in str. -// (The $ has already been removed by the caller.) -// If it is a number, extract returns num set to that number; otherwise num = -1. -func extract(str string) (name string, num int, rest string, ok bool) { - if str == "" { - return - } - brace := false - if str[0] == '{' { - brace = true - str = str[1:] - } - i := 0 - for i < len(str) { - rune, size := utf8.DecodeRuneInString(str[i:]) - if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' { - break - } - i += size - } - if i == 0 { - // empty name is not okay - return - } - name = str[:i] - if brace { - if i >= len(str) || str[i] != '}' { - // missing closing brace - return - } - i++ - } - - // Parse number. - num = 0 - for i := 0; i < len(name); i++ { - if name[i] < '0' || '9' < name[i] || num >= 1e8 { - num = -1 - break - } - num = num*10 + int(name[i]) - '0' - } - // Disallow leading zeros. - if name[0] == '0' && len(name) > 1 { - num = -1 - } - - rest = str[i:] - ok = true - return -} - -// FindSubmatchIndex returns a slice holding the index pairs identifying the -// leftmost match of the regular expression in b and the matches, if any, of -// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindSubmatchIndex(b []byte) []int { - return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil)) -} - -// FindStringSubmatch returns a slice of strings holding the text of the -// leftmost match of the regular expression in s and the matches, if any, of -// its subexpressions, as defined by the 'Submatch' description in the -// package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindStringSubmatch(s string) []string { - var dstCap [4]int - a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0]) - if a == nil { - return nil - } - ret := make([]string, 1+re.numSubexp) - for i := range ret { - if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = s[a[2*i]:a[2*i+1]] - } - } - return ret -} - -// FindStringSubmatchIndex returns a slice holding the index pairs -// identifying the leftmost match of the regular expression in s and the -// matches, if any, of its subexpressions, as defined by the 'Submatch' and -// 'Index' descriptions in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindStringSubmatchIndex(s string) []int { - return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil)) -} - -// FindReaderSubmatchIndex returns a slice holding the index pairs -// identifying the leftmost match of the regular expression of text read by -// the [io.RuneReader], and the matches, if any, of its subexpressions, as defined -// by the 'Submatch' and 'Index' descriptions in the package comment. A -// return value of nil indicates no match. -func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { - return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil)) -} - -const startSize = 10 // The size at which to start a slice in the 'All' routines. - -// FindAll is the 'All' version of [Regexp.Find]; it returns a slice of all successive -// matches of the expression, as defined by the 'All' description in the -// package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAll(b []byte, n int) [][]byte { - if n < 0 { - n = len(b) + 1 - } - var result [][]byte - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][]byte, 0, startSize) - } - result = append(result, b[match[0]:match[1]:match[1]]) - }) - return result -} - -// FindAllIndex is the 'All' version of [Regexp.FindIndex]; it returns a slice of all -// successive matches of the expression, as defined by the 'All' description -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { - if n < 0 { - n = len(b) + 1 - } - var result [][]int - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match[0:2]) - }) - return result -} - -// FindAllString is the 'All' version of [Regexp.FindString]; it returns a slice of all -// successive matches of the expression, as defined by the 'All' description -// in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllString(s string, n int) []string { - if n < 0 { - n = len(s) + 1 - } - var result []string - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([]string, 0, startSize) - } - result = append(result, s[match[0]:match[1]]) - }) - return result -} - -// FindAllStringIndex is the 'All' version of [Regexp.FindStringIndex]; it returns a -// slice of all successive matches of the expression, as defined by the 'All' -// description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { - if n < 0 { - n = len(s) + 1 - } - var result [][]int - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match[0:2]) - }) - return result -} - -// FindAllSubmatch is the 'All' version of [Regexp.FindSubmatch]; it returns a slice -// of all successive matches of the expression, as defined by the 'All' -// description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { - if n < 0 { - n = len(b) + 1 - } - var result [][][]byte - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][][]byte, 0, startSize) - } - slice := make([][]byte, len(match)/2) - for j := range slice { - if match[2*j] >= 0 { - slice[j] = b[match[2*j]:match[2*j+1]:match[2*j+1]] - } - } - result = append(result, slice) - }) - return result -} - -// FindAllSubmatchIndex is the 'All' version of [Regexp.FindSubmatchIndex]; it returns -// a slice of all successive matches of the expression, as defined by the -// 'All' description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { - if n < 0 { - n = len(b) + 1 - } - var result [][]int - re.allMatches("", b, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match) - }) - return result -} - -// FindAllStringSubmatch is the 'All' version of [Regexp.FindStringSubmatch]; it -// returns a slice of all successive matches of the expression, as defined by -// the 'All' description in the package comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { - if n < 0 { - n = len(s) + 1 - } - var result [][]string - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([][]string, 0, startSize) - } - slice := make([]string, len(match)/2) - for j := range slice { - if match[2*j] >= 0 { - slice[j] = s[match[2*j]:match[2*j+1]] - } - } - result = append(result, slice) - }) - return result -} - -// FindAllStringSubmatchIndex is the 'All' version of -// [Regexp.FindStringSubmatchIndex]; it returns a slice of all successive matches of -// the expression, as defined by the 'All' description in the package -// comment. -// A return value of nil indicates no match. -func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { - if n < 0 { - n = len(s) + 1 - } - var result [][]int - re.allMatches(s, nil, n, func(match []int) { - if result == nil { - result = make([][]int, 0, startSize) - } - result = append(result, match) - }) - return result -} - -// Split slices s into substrings separated by the expression and returns a slice of -// the substrings between those expression matches. -// -// The slice returned by this method consists of all the substrings of s -// not contained in the slice returned by [Regexp.FindAllString]. When called on an expression -// that contains no metacharacters, it is equivalent to [strings.SplitN]. -// -// Example: -// -// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) -// // s: ["", "b", "b", "c", "cadaaae"] -// -// The count determines the number of substrings to return: -// -// n > 0: at most n substrings; the last substring will be the unsplit remainder. -// n == 0: the result is nil (zero substrings) -// n < 0: all substrings -func (re *Regexp) Split(s string, n int) []string { - - if n == 0 { - return nil - } - - if len(re.expr) > 0 && len(s) == 0 { - return []string{""} - } - - matches := re.FindAllStringIndex(s, n) - strings := make([]string, 0, len(matches)) - - beg := 0 - end := 0 - for _, match := range matches { - if n > 0 && len(strings) >= n-1 { - break - } - - end = match[0] - if match[1] != 0 { - strings = append(strings, s[beg:end]) - } - beg = match[1] - } - - if end != len(s) { - strings = append(strings, s[beg:]) - } - - return strings -} - -// MarshalText implements [encoding.TextMarshaler]. The output -// matches that of calling the [Regexp.String] method. -// -// Note that the output is lossy in some cases: This method does not indicate -// POSIX regular expressions (i.e. those compiled by calling [CompilePOSIX]), or -// those for which the [Regexp.Longest] method has been called. -func (re *Regexp) MarshalText() ([]byte, error) { - return []byte(re.String()), nil -} - -// UnmarshalText implements [encoding.TextUnmarshaler] by calling -// [Compile] on the encoded value. -func (re *Regexp) UnmarshalText(text []byte) error { - newRE, err := Compile(string(text)) - if err != nil { - return err - } - *re = *newRE - return nil -} |
