diff options
Diffstat (limited to 'vendor/github.com/rivo/uniseg/sentencerules.go')
| -rw-r--r-- | vendor/github.com/rivo/uniseg/sentencerules.go | 276 |
1 files changed, 0 insertions, 276 deletions
diff --git a/vendor/github.com/rivo/uniseg/sentencerules.go b/vendor/github.com/rivo/uniseg/sentencerules.go deleted file mode 100644 index 0b29c7bdb..000000000 --- a/vendor/github.com/rivo/uniseg/sentencerules.go +++ /dev/null @@ -1,276 +0,0 @@ -package uniseg - -import "unicode/utf8" - -// The states of the sentence break parser. -const ( - sbAny = iota - sbCR - sbParaSep - sbATerm - sbUpper - sbLower - sbSB7 - sbSB8Close - sbSB8Sp - sbSTerm - sbSB8aClose - sbSB8aSp -) - -// sbTransitions implements the sentence break parser's state transitions. It's -// anologous to [grTransitions], see comments there for details. -// -// Unicode version 15.0.0. -func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) { - switch uint64(state) | uint64(prop)<<32 { - // SB3. - case sbAny | prCR<<32: - return sbCR, false, 9990 - case sbCR | prLF<<32: - return sbParaSep, false, 30 - - // SB4. - case sbAny | prSep<<32: - return sbParaSep, false, 9990 - case sbAny | prLF<<32: - return sbParaSep, false, 9990 - case sbParaSep | prAny<<32: - return sbAny, true, 40 - case sbCR | prAny<<32: - return sbAny, true, 40 - - // SB6. - case sbAny | prATerm<<32: - return sbATerm, false, 9990 - case sbATerm | prNumeric<<32: - return sbAny, false, 60 - case sbSB7 | prNumeric<<32: - return sbAny, false, 60 // Because ATerm also appears in SB7. - - // SB7. - case sbAny | prUpper<<32: - return sbUpper, false, 9990 - case sbAny | prLower<<32: - return sbLower, false, 9990 - case sbUpper | prATerm<<32: - return sbSB7, false, 70 - case sbLower | prATerm<<32: - return sbSB7, false, 70 - case sbSB7 | prUpper<<32: - return sbUpper, false, 70 - - // SB8a. - case sbAny | prSTerm<<32: - return sbSTerm, false, 9990 - case sbATerm | prSContinue<<32: - return sbAny, false, 81 - case sbATerm | prATerm<<32: - return sbATerm, false, 81 - case sbATerm | prSTerm<<32: - return sbSTerm, false, 81 - case sbSB7 | prSContinue<<32: - return sbAny, false, 81 - case sbSB7 | prATerm<<32: - return sbATerm, false, 81 - case sbSB7 | prSTerm<<32: - return sbSTerm, false, 81 - case sbSB8Close | prSContinue<<32: - return sbAny, false, 81 - case sbSB8Close | prATerm<<32: - return sbATerm, false, 81 - case sbSB8Close | prSTerm<<32: - return sbSTerm, false, 81 - case sbSB8Sp | prSContinue<<32: - return sbAny, false, 81 - case sbSB8Sp | prATerm<<32: - return sbATerm, false, 81 - case sbSB8Sp | prSTerm<<32: - return sbSTerm, false, 81 - case sbSTerm | prSContinue<<32: - return sbAny, false, 81 - case sbSTerm | prATerm<<32: - return sbATerm, false, 81 - case sbSTerm | prSTerm<<32: - return sbSTerm, false, 81 - case sbSB8aClose | prSContinue<<32: - return sbAny, false, 81 - case sbSB8aClose | prATerm<<32: - return sbATerm, false, 81 - case sbSB8aClose | prSTerm<<32: - return sbSTerm, false, 81 - case sbSB8aSp | prSContinue<<32: - return sbAny, false, 81 - case sbSB8aSp | prATerm<<32: - return sbATerm, false, 81 - case sbSB8aSp | prSTerm<<32: - return sbSTerm, false, 81 - - // SB9. - case sbATerm | prClose<<32: - return sbSB8Close, false, 90 - case sbSB7 | prClose<<32: - return sbSB8Close, false, 90 - case sbSB8Close | prClose<<32: - return sbSB8Close, false, 90 - case sbATerm | prSp<<32: - return sbSB8Sp, false, 90 - case sbSB7 | prSp<<32: - return sbSB8Sp, false, 90 - case sbSB8Close | prSp<<32: - return sbSB8Sp, false, 90 - case sbSTerm | prClose<<32: - return sbSB8aClose, false, 90 - case sbSB8aClose | prClose<<32: - return sbSB8aClose, false, 90 - case sbSTerm | prSp<<32: - return sbSB8aSp, false, 90 - case sbSB8aClose | prSp<<32: - return sbSB8aSp, false, 90 - case sbATerm | prSep<<32: - return sbParaSep, false, 90 - case sbATerm | prCR<<32: - return sbParaSep, false, 90 - case sbATerm | prLF<<32: - return sbParaSep, false, 90 - case sbSB7 | prSep<<32: - return sbParaSep, false, 90 - case sbSB7 | prCR<<32: - return sbParaSep, false, 90 - case sbSB7 | prLF<<32: - return sbParaSep, false, 90 - case sbSB8Close | prSep<<32: - return sbParaSep, false, 90 - case sbSB8Close | prCR<<32: - return sbParaSep, false, 90 - case sbSB8Close | prLF<<32: - return sbParaSep, false, 90 - case sbSTerm | prSep<<32: - return sbParaSep, false, 90 - case sbSTerm | prCR<<32: - return sbParaSep, false, 90 - case sbSTerm | prLF<<32: - return sbParaSep, false, 90 - case sbSB8aClose | prSep<<32: - return sbParaSep, false, 90 - case sbSB8aClose | prCR<<32: - return sbParaSep, false, 90 - case sbSB8aClose | prLF<<32: - return sbParaSep, false, 90 - - // SB10. - case sbSB8Sp | prSp<<32: - return sbSB8Sp, false, 100 - case sbSB8aSp | prSp<<32: - return sbSB8aSp, false, 100 - case sbSB8Sp | prSep<<32: - return sbParaSep, false, 100 - case sbSB8Sp | prCR<<32: - return sbParaSep, false, 100 - case sbSB8Sp | prLF<<32: - return sbParaSep, false, 100 - - // SB11. - case sbATerm | prAny<<32: - return sbAny, true, 110 - case sbSB7 | prAny<<32: - return sbAny, true, 110 - case sbSB8Close | prAny<<32: - return sbAny, true, 110 - case sbSB8Sp | prAny<<32: - return sbAny, true, 110 - case sbSTerm | prAny<<32: - return sbAny, true, 110 - case sbSB8aClose | prAny<<32: - return sbAny, true, 110 - case sbSB8aSp | prAny<<32: - return sbAny, true, 110 - // We'll always break after ParaSep due to SB4. - - default: - return -1, false, -1 - } -} - -// transitionSentenceBreakState determines the new state of the sentence break -// parser given the current state and the next code point. It also returns -// whether a sentence boundary was detected. If more than one code point is -// needed to determine the new state, the byte slice or the string starting -// after rune "r" can be used (whichever is not nil or empty) for further -// lookups. -func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newState int, sentenceBreak bool) { - // Determine the property of the next character. - nextProperty := property(sentenceBreakCodePoints, r) - - // SB5 (Replacing Ignore Rules). - if nextProperty == prExtend || nextProperty == prFormat { - if state == sbParaSep || state == sbCR { - return sbAny, true // Make sure we don't apply SB5 to SB3 or SB4. - } - if state < 0 { - return sbAny, true // SB1. - } - return state, false - } - - // Find the applicable transition in the table. - var rule int - newState, sentenceBreak, rule = sbTransitions(state, nextProperty) - if newState < 0 { - // No specific transition found. Try the less specific ones. - anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny) - anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty) - if anyPropState >= 0 && anyStateState >= 0 { - // Both apply. We'll use a mix (see comments for grTransitions). - newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule - if anyPropRule < anyStateRule { - sentenceBreak, rule = anyPropProp, anyPropRule - } - } else if anyPropState >= 0 { - // We only have a specific state. - newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule - // This branch will probably never be reached because okAnyState will - // always be true given the current transition map. But we keep it here - // for future modifications to the transition map where this may not be - // true anymore. - } else if anyStateState >= 0 { - // We only have a specific property. - newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule - } else { - // No known transition. SB999: Any × Any. - newState, sentenceBreak, rule = sbAny, false, 9990 - } - } - - // SB8. - if rule > 80 && (state == sbATerm || state == sbSB8Close || state == sbSB8Sp || state == sbSB7) { - // Check the right side of the rule. - var length int - for nextProperty != prOLetter && - nextProperty != prUpper && - nextProperty != prLower && - nextProperty != prSep && - nextProperty != prCR && - nextProperty != prLF && - nextProperty != prATerm && - nextProperty != prSTerm { - // Move on to the next rune. - if b != nil { // Byte slice version. - r, length = utf8.DecodeRune(b) - b = b[length:] - } else { // String version. - r, length = utf8.DecodeRuneInString(str) - str = str[length:] - } - if r == utf8.RuneError { - break - } - nextProperty = property(sentenceBreakCodePoints, r) - } - if nextProperty == prLower { - return sbLower, false - } - } - - return -} |
