diff options
Diffstat (limited to 'vendor/github.com/go-openapi/swag/split.go')
-rw-r--r-- | vendor/github.com/go-openapi/swag/split.go | 470 |
1 files changed, 358 insertions, 112 deletions
diff --git a/vendor/github.com/go-openapi/swag/split.go b/vendor/github.com/go-openapi/swag/split.go index a1825fb7d..274727a86 100644 --- a/vendor/github.com/go-openapi/swag/split.go +++ b/vendor/github.com/go-openapi/swag/split.go @@ -15,124 +15,269 @@ package swag import ( + "bytes" + "sync" "unicode" + "unicode/utf8" ) -var nameReplaceTable = map[rune]string{ - '@': "At ", - '&': "And ", - '|': "Pipe ", - '$': "Dollar ", - '!': "Bang ", - '-': "", - '_': "", -} - type ( splitter struct { - postSplitInitialismCheck bool initialisms []string + initialismsRunes [][]rune + initialismsUpperCased [][]rune // initialisms cached in their trimmed, upper-cased version + postSplitInitialismCheck bool + } + + splitterOption func(*splitter) + + initialismMatch struct { + body []rune + start, end int + complete bool + } + initialismMatches []initialismMatch +) + +type ( + // memory pools of temporary objects. + // + // These are used to recycle temporarily allocated objects + // and relieve the GC from undue pressure. + + matchesPool struct { + *sync.Pool } - splitterOption func(*splitter) *splitter + buffersPool struct { + *sync.Pool + } + + lexemsPool struct { + *sync.Pool + } + + splittersPool struct { + *sync.Pool + } ) -// split calls the splitter; splitter provides more control and post options +var ( + // poolOfMatches holds temporary slices for recycling during the initialism match process + poolOfMatches = matchesPool{ + Pool: &sync.Pool{ + New: func() any { + s := make(initialismMatches, 0, maxAllocMatches) + + return &s + }, + }, + } + + poolOfBuffers = buffersPool{ + Pool: &sync.Pool{ + New: func() any { + return new(bytes.Buffer) + }, + }, + } + + poolOfLexems = lexemsPool{ + Pool: &sync.Pool{ + New: func() any { + s := make([]nameLexem, 0, maxAllocMatches) + + return &s + }, + }, + } + + poolOfSplitters = splittersPool{ + Pool: &sync.Pool{ + New: func() any { + s := newSplitter() + + return &s + }, + }, + } +) + +// nameReplaceTable finds a word representation for special characters. +func nameReplaceTable(r rune) (string, bool) { + switch r { + case '@': + return "At ", true + case '&': + return "And ", true + case '|': + return "Pipe ", true + case '$': + return "Dollar ", true + case '!': + return "Bang ", true + case '-': + return "", true + case '_': + return "", true + default: + return "", false + } +} + +// split calls the splitter. +// +// Use newSplitter for more control and options func split(str string) []string { - lexems := newSplitter().split(str) - result := make([]string, 0, len(lexems)) + s := poolOfSplitters.BorrowSplitter() + lexems := s.split(str) + result := make([]string, 0, len(*lexems)) - for _, lexem := range lexems { + for _, lexem := range *lexems { result = append(result, lexem.GetOriginal()) } + poolOfLexems.RedeemLexems(lexems) + poolOfSplitters.RedeemSplitter(s) return result } -func (s *splitter) split(str string) []nameLexem { - return s.toNameLexems(str) -} - -func newSplitter(options ...splitterOption) *splitter { - splitter := &splitter{ +func newSplitter(options ...splitterOption) splitter { + s := splitter{ postSplitInitialismCheck: false, initialisms: initialisms, + initialismsRunes: initialismsRunes, + initialismsUpperCased: initialismsUpperCased, } for _, option := range options { - splitter = option(splitter) + option(&s) } - return splitter + return s } // withPostSplitInitialismCheck allows to catch initialisms after main split process -func withPostSplitInitialismCheck(s *splitter) *splitter { +func withPostSplitInitialismCheck(s *splitter) { s.postSplitInitialismCheck = true +} + +func (p matchesPool) BorrowMatches() *initialismMatches { + s := p.Get().(*initialismMatches) + *s = (*s)[:0] // reset slice, keep allocated capacity + return s } -type ( - initialismMatch struct { - start, end int - body []rune - complete bool +func (p buffersPool) BorrowBuffer(size int) *bytes.Buffer { + s := p.Get().(*bytes.Buffer) + s.Reset() + + if s.Cap() < size { + s.Grow(size) } - initialismMatches []*initialismMatch -) -func (s *splitter) toNameLexems(name string) []nameLexem { + return s +} + +func (p lexemsPool) BorrowLexems() *[]nameLexem { + s := p.Get().(*[]nameLexem) + *s = (*s)[:0] // reset slice, keep allocated capacity + + return s +} + +func (p splittersPool) BorrowSplitter(options ...splitterOption) *splitter { + s := p.Get().(*splitter) + s.postSplitInitialismCheck = false // reset options + for _, apply := range options { + apply(s) + } + + return s +} + +func (p matchesPool) RedeemMatches(s *initialismMatches) { + p.Put(s) +} + +func (p buffersPool) RedeemBuffer(s *bytes.Buffer) { + p.Put(s) +} + +func (p lexemsPool) RedeemLexems(s *[]nameLexem) { + p.Put(s) +} + +func (p splittersPool) RedeemSplitter(s *splitter) { + p.Put(s) +} + +func (m initialismMatch) isZero() bool { + return m.start == 0 && m.end == 0 +} + +func (s splitter) split(name string) *[]nameLexem { nameRunes := []rune(name) matches := s.gatherInitialismMatches(nameRunes) + if matches == nil { + return poolOfLexems.BorrowLexems() + } + return s.mapMatchesToNameLexems(nameRunes, matches) } -func (s *splitter) gatherInitialismMatches(nameRunes []rune) initialismMatches { - matches := make(initialismMatches, 0) +func (s splitter) gatherInitialismMatches(nameRunes []rune) *initialismMatches { + var matches *initialismMatches for currentRunePosition, currentRune := range nameRunes { - newMatches := make(initialismMatches, 0, len(matches)) + // recycle these allocations as we loop over runes + // with such recycling, only 2 slices should be allocated per call + // instead of o(n). + newMatches := poolOfMatches.BorrowMatches() // check current initialism matches - for _, match := range matches { - if keepCompleteMatch := match.complete; keepCompleteMatch { - newMatches = append(newMatches, match) - continue - } + if matches != nil { // skip first iteration + for _, match := range *matches { + if keepCompleteMatch := match.complete; keepCompleteMatch { + *newMatches = append(*newMatches, match) + continue + } - // drop failed match - currentMatchRune := match.body[currentRunePosition-match.start] - if !s.initialismRuneEqual(currentMatchRune, currentRune) { - continue - } + // drop failed match + currentMatchRune := match.body[currentRunePosition-match.start] + if currentMatchRune != currentRune { + continue + } - // try to complete ongoing match - if currentRunePosition-match.start == len(match.body)-1 { - // we are close; the next step is to check the symbol ahead - // if it is a small letter, then it is not the end of match - // but beginning of the next word - - if currentRunePosition < len(nameRunes)-1 { - nextRune := nameRunes[currentRunePosition+1] - if newWord := unicode.IsLower(nextRune); newWord { - // oh ok, it was the start of a new word - continue + // try to complete ongoing match + if currentRunePosition-match.start == len(match.body)-1 { + // we are close; the next step is to check the symbol ahead + // if it is a small letter, then it is not the end of match + // but beginning of the next word + + if currentRunePosition < len(nameRunes)-1 { + nextRune := nameRunes[currentRunePosition+1] + if newWord := unicode.IsLower(nextRune); newWord { + // oh ok, it was the start of a new word + continue + } } + + match.complete = true + match.end = currentRunePosition } - match.complete = true - match.end = currentRunePosition + *newMatches = append(*newMatches, match) } - - newMatches = append(newMatches, match) } // check for new initialism matches - for _, initialism := range s.initialisms { - initialismRunes := []rune(initialism) - if s.initialismRuneEqual(initialismRunes[0], currentRune) { - newMatches = append(newMatches, &initialismMatch{ + for i := range s.initialisms { + initialismRunes := s.initialismsRunes[i] + if initialismRunes[0] == currentRune { + *newMatches = append(*newMatches, initialismMatch{ start: currentRunePosition, body: initialismRunes, complete: false, @@ -140,24 +285,28 @@ func (s *splitter) gatherInitialismMatches(nameRunes []rune) initialismMatches { } } + if matches != nil { + poolOfMatches.RedeemMatches(matches) + } matches = newMatches } + // up to the caller to redeem this last slice return matches } -func (s *splitter) mapMatchesToNameLexems(nameRunes []rune, matches initialismMatches) []nameLexem { - nameLexems := make([]nameLexem, 0) +func (s splitter) mapMatchesToNameLexems(nameRunes []rune, matches *initialismMatches) *[]nameLexem { + nameLexems := poolOfLexems.BorrowLexems() - var lastAcceptedMatch *initialismMatch - for _, match := range matches { + var lastAcceptedMatch initialismMatch + for _, match := range *matches { if !match.complete { continue } - if firstMatch := lastAcceptedMatch == nil; firstMatch { - nameLexems = append(nameLexems, s.breakCasualString(nameRunes[:match.start])...) - nameLexems = append(nameLexems, s.breakInitialism(string(match.body))) + if firstMatch := lastAcceptedMatch.isZero(); firstMatch { + s.appendBrokenDownCasualString(nameLexems, nameRunes[:match.start]) + *nameLexems = append(*nameLexems, s.breakInitialism(string(match.body))) lastAcceptedMatch = match @@ -169,63 +318,66 @@ func (s *splitter) mapMatchesToNameLexems(nameRunes []rune, matches initialismMa } middle := nameRunes[lastAcceptedMatch.end+1 : match.start] - nameLexems = append(nameLexems, s.breakCasualString(middle)...) - nameLexems = append(nameLexems, s.breakInitialism(string(match.body))) + s.appendBrokenDownCasualString(nameLexems, middle) + *nameLexems = append(*nameLexems, s.breakInitialism(string(match.body))) lastAcceptedMatch = match } // we have not found any accepted matches - if lastAcceptedMatch == nil { - return s.breakCasualString(nameRunes) - } - - if lastAcceptedMatch.end+1 != len(nameRunes) { + if lastAcceptedMatch.isZero() { + *nameLexems = (*nameLexems)[:0] + s.appendBrokenDownCasualString(nameLexems, nameRunes) + } else if lastAcceptedMatch.end+1 != len(nameRunes) { rest := nameRunes[lastAcceptedMatch.end+1:] - nameLexems = append(nameLexems, s.breakCasualString(rest)...) + s.appendBrokenDownCasualString(nameLexems, rest) } - return nameLexems -} + poolOfMatches.RedeemMatches(matches) -func (s *splitter) initialismRuneEqual(a, b rune) bool { - return a == b + return nameLexems } -func (s *splitter) breakInitialism(original string) nameLexem { +func (s splitter) breakInitialism(original string) nameLexem { return newInitialismNameLexem(original, original) } -func (s *splitter) breakCasualString(str []rune) []nameLexem { - segments := make([]nameLexem, 0) - currentSegment := "" +func (s splitter) appendBrokenDownCasualString(segments *[]nameLexem, str []rune) { + currentSegment := poolOfBuffers.BorrowBuffer(len(str)) // unlike strings.Builder, bytes.Buffer initial storage can reused + defer func() { + poolOfBuffers.RedeemBuffer(currentSegment) + }() addCasualNameLexem := func(original string) { - segments = append(segments, newCasualNameLexem(original)) + *segments = append(*segments, newCasualNameLexem(original)) } addInitialismNameLexem := func(original, match string) { - segments = append(segments, newInitialismNameLexem(original, match)) + *segments = append(*segments, newInitialismNameLexem(original, match)) } - addNameLexem := func(original string) { - if s.postSplitInitialismCheck { - for _, initialism := range s.initialisms { - if upper(initialism) == upper(original) { - addInitialismNameLexem(original, initialism) + var addNameLexem func(string) + if s.postSplitInitialismCheck { + addNameLexem = func(original string) { + for i := range s.initialisms { + if isEqualFoldIgnoreSpace(s.initialismsUpperCased[i], original) { + addInitialismNameLexem(original, s.initialisms[i]) + return } } - } - addCasualNameLexem(original) + addCasualNameLexem(original) + } + } else { + addNameLexem = addCasualNameLexem } - for _, rn := range string(str) { - if replace, found := nameReplaceTable[rn]; found { - if currentSegment != "" { - addNameLexem(currentSegment) - currentSegment = "" + for _, rn := range str { + if replace, found := nameReplaceTable(rn); found { + if currentSegment.Len() > 0 { + addNameLexem(currentSegment.String()) + currentSegment.Reset() } if replace != "" { @@ -236,27 +388,121 @@ func (s *splitter) breakCasualString(str []rune) []nameLexem { } if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) { - if currentSegment != "" { - addNameLexem(currentSegment) - currentSegment = "" + if currentSegment.Len() > 0 { + addNameLexem(currentSegment.String()) + currentSegment.Reset() } continue } if unicode.IsUpper(rn) { - if currentSegment != "" { - addNameLexem(currentSegment) + if currentSegment.Len() > 0 { + addNameLexem(currentSegment.String()) } - currentSegment = "" + currentSegment.Reset() } - currentSegment += string(rn) + currentSegment.WriteRune(rn) + } + + if currentSegment.Len() > 0 { + addNameLexem(currentSegment.String()) } +} + +// isEqualFoldIgnoreSpace is the same as strings.EqualFold, but +// it ignores leading and trailing blank spaces in the compared +// string. +// +// base is assumed to be composed of upper-cased runes, and be already +// trimmed. +// +// This code is heavily inspired from strings.EqualFold. +func isEqualFoldIgnoreSpace(base []rune, str string) bool { + var i, baseIndex int + // equivalent to b := []byte(str), but without data copy + b := hackStringBytes(str) + + for i < len(b) { + if c := b[i]; c < utf8.RuneSelf { + // fast path for ASCII + if c != ' ' && c != '\t' { + break + } + i++ + + continue + } + + // unicode case + r, size := utf8.DecodeRune(b[i:]) + if !unicode.IsSpace(r) { + break + } + i += size + } + + if i >= len(b) { + return len(base) == 0 + } + + for _, baseRune := range base { + if i >= len(b) { + break + } + + if c := b[i]; c < utf8.RuneSelf { + // single byte rune case (ASCII) + if baseRune >= utf8.RuneSelf { + return false + } + + baseChar := byte(baseRune) + if c != baseChar && + !('a' <= c && c <= 'z' && c-'a'+'A' == baseChar) { + return false + } + + baseIndex++ + i++ + + continue + } + + // unicode case + r, size := utf8.DecodeRune(b[i:]) + if unicode.ToUpper(r) != baseRune { + return false + } + baseIndex++ + i += size + } + + if baseIndex != len(base) { + return false + } + + // all passed: now we should only have blanks + for i < len(b) { + if c := b[i]; c < utf8.RuneSelf { + // fast path for ASCII + if c != ' ' && c != '\t' { + return false + } + i++ + + continue + } + + // unicode case + r, size := utf8.DecodeRune(b[i:]) + if !unicode.IsSpace(r) { + return false + } - if currentSegment != "" { - addNameLexem(currentSegment) + i += size } - return segments + return true } |