diff options
Diffstat (limited to 'vendor/github.com/rivo/uniseg/linerules.go')
-rw-r--r-- | vendor/github.com/rivo/uniseg/linerules.go | 626 |
1 files changed, 626 insertions, 0 deletions
diff --git a/vendor/github.com/rivo/uniseg/linerules.go b/vendor/github.com/rivo/uniseg/linerules.go new file mode 100644 index 000000000..7708ae0fb --- /dev/null +++ b/vendor/github.com/rivo/uniseg/linerules.go @@ -0,0 +1,626 @@ +package uniseg + +import "unicode/utf8" + +// The states of the line break parser. +const ( + lbAny = iota + lbBK + lbCR + lbLF + lbNL + lbSP + lbZW + lbWJ + lbGL + lbBA + lbHY + lbCL + lbCP + lbEX + lbIS + lbSY + lbOP + lbQU + lbQUSP + lbNS + lbCLCPSP + lbB2 + lbB2SP + lbCB + lbBB + lbLB21a + lbHL + lbAL + lbNU + lbPR + lbEB + lbIDEM + lbNUNU + lbNUSY + lbNUIS + lbNUCL + lbNUCP + lbPO + lbJL + lbJV + lbJT + lbH2 + lbH3 + lbOddRI + lbEvenRI + lbExtPicCn + lbZWJBit = 64 + lbCPeaFWHBit = 128 +) + +// These constants define whether a given text may be broken into the next line. +// If the break is optional (LineCanBreak), you may choose to break or not based +// on your own criteria, for example, if the text has reached the available +// width. +const ( + LineDontBreak = iota // You may not break the line here. + LineCanBreak // You may or may not break the line here. + LineMustBreak // You must break the line here. +) + +// lbTransitions implements the line break parser's state transitions. It's +// anologous to [grTransitions], see comments there for details. +// +// Unicode version 15.0.0. +func lbTransitions(state, prop int) (newState, lineBreak, rule int) { + switch uint64(state) | uint64(prop)<<32 { + // LB4. + case lbBK | prAny<<32: + return lbAny, LineMustBreak, 40 + + // LB5. + case lbCR | prLF<<32: + return lbLF, LineDontBreak, 50 + case lbCR | prAny<<32: + return lbAny, LineMustBreak, 50 + case lbLF | prAny<<32: + return lbAny, LineMustBreak, 50 + case lbNL | prAny<<32: + return lbAny, LineMustBreak, 50 + + // LB6. + case lbAny | prBK<<32: + return lbBK, LineDontBreak, 60 + case lbAny | prCR<<32: + return lbCR, LineDontBreak, 60 + case lbAny | prLF<<32: + return lbLF, LineDontBreak, 60 + case lbAny | prNL<<32: + return lbNL, LineDontBreak, 60 + + // LB7. + case lbAny | prSP<<32: + return lbSP, LineDontBreak, 70 + case lbAny | prZW<<32: + return lbZW, LineDontBreak, 70 + + // LB8. + case lbZW | prSP<<32: + return lbZW, LineDontBreak, 70 + case lbZW | prAny<<32: + return lbAny, LineCanBreak, 80 + + // LB11. + case lbAny | prWJ<<32: + return lbWJ, LineDontBreak, 110 + case lbWJ | prAny<<32: + return lbAny, LineDontBreak, 110 + + // LB12. + case lbAny | prGL<<32: + return lbGL, LineCanBreak, 310 + case lbGL | prAny<<32: + return lbAny, LineDontBreak, 120 + + // LB13 (simple transitions). + case lbAny | prCL<<32: + return lbCL, LineCanBreak, 310 + case lbAny | prCP<<32: + return lbCP, LineCanBreak, 310 + case lbAny | prEX<<32: + return lbEX, LineDontBreak, 130 + case lbAny | prIS<<32: + return lbIS, LineCanBreak, 310 + case lbAny | prSY<<32: + return lbSY, LineCanBreak, 310 + + // LB14. + case lbAny | prOP<<32: + return lbOP, LineCanBreak, 310 + case lbOP | prSP<<32: + return lbOP, LineDontBreak, 70 + case lbOP | prAny<<32: + return lbAny, LineDontBreak, 140 + + // LB15. + case lbQU | prSP<<32: + return lbQUSP, LineDontBreak, 70 + case lbQU | prOP<<32: + return lbOP, LineDontBreak, 150 + case lbQUSP | prOP<<32: + return lbOP, LineDontBreak, 150 + + // LB16. + case lbCL | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbNUCL | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbCP | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbNUCP | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbCL | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbNUCL | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbCP | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbNUCP | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbCLCPSP | prNS<<32: + return lbNS, LineDontBreak, 160 + + // LB17. + case lbAny | prB2<<32: + return lbB2, LineCanBreak, 310 + case lbB2 | prSP<<32: + return lbB2SP, LineDontBreak, 70 + case lbB2 | prB2<<32: + return lbB2, LineDontBreak, 170 + case lbB2SP | prB2<<32: + return lbB2, LineDontBreak, 170 + + // LB18. + case lbSP | prAny<<32: + return lbAny, LineCanBreak, 180 + case lbQUSP | prAny<<32: + return lbAny, LineCanBreak, 180 + case lbCLCPSP | prAny<<32: + return lbAny, LineCanBreak, 180 + case lbB2SP | prAny<<32: + return lbAny, LineCanBreak, 180 + + // LB19. + case lbAny | prQU<<32: + return lbQU, LineDontBreak, 190 + case lbQU | prAny<<32: + return lbAny, LineDontBreak, 190 + + // LB20. + case lbAny | prCB<<32: + return lbCB, LineCanBreak, 200 + case lbCB | prAny<<32: + return lbAny, LineCanBreak, 200 + + // LB21. + case lbAny | prBA<<32: + return lbBA, LineDontBreak, 210 + case lbAny | prHY<<32: + return lbHY, LineDontBreak, 210 + case lbAny | prNS<<32: + return lbNS, LineDontBreak, 210 + case lbAny | prBB<<32: + return lbBB, LineCanBreak, 310 + case lbBB | prAny<<32: + return lbAny, LineDontBreak, 210 + + // LB21a. + case lbAny | prHL<<32: + return lbHL, LineCanBreak, 310 + case lbHL | prHY<<32: + return lbLB21a, LineDontBreak, 210 + case lbHL | prBA<<32: + return lbLB21a, LineDontBreak, 210 + case lbLB21a | prAny<<32: + return lbAny, LineDontBreak, 211 + + // LB21b. + case lbSY | prHL<<32: + return lbHL, LineDontBreak, 212 + case lbNUSY | prHL<<32: + return lbHL, LineDontBreak, 212 + + // LB22. + case lbAny | prIN<<32: + return lbAny, LineDontBreak, 220 + + // LB23. + case lbAny | prAL<<32: + return lbAL, LineCanBreak, 310 + case lbAny | prNU<<32: + return lbNU, LineCanBreak, 310 + case lbAL | prNU<<32: + return lbNU, LineDontBreak, 230 + case lbHL | prNU<<32: + return lbNU, LineDontBreak, 230 + case lbNU | prAL<<32: + return lbAL, LineDontBreak, 230 + case lbNU | prHL<<32: + return lbHL, LineDontBreak, 230 + case lbNUNU | prAL<<32: + return lbAL, LineDontBreak, 230 + case lbNUNU | prHL<<32: + return lbHL, LineDontBreak, 230 + + // LB23a. + case lbAny | prPR<<32: + return lbPR, LineCanBreak, 310 + case lbAny | prID<<32: + return lbIDEM, LineCanBreak, 310 + case lbAny | prEB<<32: + return lbEB, LineCanBreak, 310 + case lbAny | prEM<<32: + return lbIDEM, LineCanBreak, 310 + case lbPR | prID<<32: + return lbIDEM, LineDontBreak, 231 + case lbPR | prEB<<32: + return lbEB, LineDontBreak, 231 + case lbPR | prEM<<32: + return lbIDEM, LineDontBreak, 231 + case lbIDEM | prPO<<32: + return lbPO, LineDontBreak, 231 + case lbEB | prPO<<32: + return lbPO, LineDontBreak, 231 + + // LB24. + case lbAny | prPO<<32: + return lbPO, LineCanBreak, 310 + case lbPR | prAL<<32: + return lbAL, LineDontBreak, 240 + case lbPR | prHL<<32: + return lbHL, LineDontBreak, 240 + case lbPO | prAL<<32: + return lbAL, LineDontBreak, 240 + case lbPO | prHL<<32: + return lbHL, LineDontBreak, 240 + case lbAL | prPR<<32: + return lbPR, LineDontBreak, 240 + case lbAL | prPO<<32: + return lbPO, LineDontBreak, 240 + case lbHL | prPR<<32: + return lbPR, LineDontBreak, 240 + case lbHL | prPO<<32: + return lbPO, LineDontBreak, 240 + + // LB25 (simple transitions). + case lbPR | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbPO | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbOP | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbHY | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbNU | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNU | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNU | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNUNU | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNUNU | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNUNU | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNUSY | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNUSY | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNUSY | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNUIS | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNUIS | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNUIS | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNU | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNU | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNUNU | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNUNU | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNUSY | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNUSY | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNUIS | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNUIS | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNU | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUNU | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUSY | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUIS | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUCL | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUCP | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNU | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUNU | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUSY | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUIS | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUCL | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUCP | prPR<<32: + return lbPR, LineDontBreak, 250 + + // LB26. + case lbAny | prJL<<32: + return lbJL, LineCanBreak, 310 + case lbAny | prJV<<32: + return lbJV, LineCanBreak, 310 + case lbAny | prJT<<32: + return lbJT, LineCanBreak, 310 + case lbAny | prH2<<32: + return lbH2, LineCanBreak, 310 + case lbAny | prH3<<32: + return lbH3, LineCanBreak, 310 + case lbJL | prJL<<32: + return lbJL, LineDontBreak, 260 + case lbJL | prJV<<32: + return lbJV, LineDontBreak, 260 + case lbJL | prH2<<32: + return lbH2, LineDontBreak, 260 + case lbJL | prH3<<32: + return lbH3, LineDontBreak, 260 + case lbJV | prJV<<32: + return lbJV, LineDontBreak, 260 + case lbJV | prJT<<32: + return lbJT, LineDontBreak, 260 + case lbH2 | prJV<<32: + return lbJV, LineDontBreak, 260 + case lbH2 | prJT<<32: + return lbJT, LineDontBreak, 260 + case lbJT | prJT<<32: + return lbJT, LineDontBreak, 260 + case lbH3 | prJT<<32: + return lbJT, LineDontBreak, 260 + + // LB27. + case lbJL | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbJV | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbJT | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbH2 | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbH3 | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbPR | prJL<<32: + return lbJL, LineDontBreak, 270 + case lbPR | prJV<<32: + return lbJV, LineDontBreak, 270 + case lbPR | prJT<<32: + return lbJT, LineDontBreak, 270 + case lbPR | prH2<<32: + return lbH2, LineDontBreak, 270 + case lbPR | prH3<<32: + return lbH3, LineDontBreak, 270 + + // LB28. + case lbAL | prAL<<32: + return lbAL, LineDontBreak, 280 + case lbAL | prHL<<32: + return lbHL, LineDontBreak, 280 + case lbHL | prAL<<32: + return lbAL, LineDontBreak, 280 + case lbHL | prHL<<32: + return lbHL, LineDontBreak, 280 + + // LB29. + case lbIS | prAL<<32: + return lbAL, LineDontBreak, 290 + case lbIS | prHL<<32: + return lbHL, LineDontBreak, 290 + case lbNUIS | prAL<<32: + return lbAL, LineDontBreak, 290 + case lbNUIS | prHL<<32: + return lbHL, LineDontBreak, 290 + + default: + return -1, -1, -1 + } +} + +// transitionLineBreakState determines the new state of the line break parser +// given the current state and the next code point. It also returns the type of +// line break: LineDontBreak, LineCanBreak, or LineMustBreak. If more than one +// code point is needed to determine the new state, the byte slice or the string +// starting after rune "r" can be used (whichever is not nil or empty) for +// further lookups. +func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) { + // Determine the property of the next character. + nextProperty, generalCategory := propertyLineBreak(r) + + // Prepare. + var forceNoBreak, isCPeaFWH bool + if state >= 0 && state&lbCPeaFWHBit != 0 { + isCPeaFWH = true // LB30: CP but ea is not F, W, or H. + state = state &^ lbCPeaFWHBit + } + if state >= 0 && state&lbZWJBit != 0 { + state = state &^ lbZWJBit // Extract zero-width joiner bit. + forceNoBreak = true // LB8a. + } + + defer func() { + // Transition into LB30. + if newState == lbCP || newState == lbNUCP { + ea := propertyEastAsianWidth(r) + if ea != prF && ea != prW && ea != prH { + newState |= lbCPeaFWHBit + } + } + + // Override break. + if forceNoBreak { + lineBreak = LineDontBreak + } + }() + + // LB1. + if nextProperty == prAI || nextProperty == prSG || nextProperty == prXX { + nextProperty = prAL + } else if nextProperty == prSA { + if generalCategory == gcMn || generalCategory == gcMc { + nextProperty = prCM + } else { + nextProperty = prAL + } + } else if nextProperty == prCJ { + nextProperty = prNS + } + + // Combining marks. + if nextProperty == prZWJ || nextProperty == prCM { + var bit int + if nextProperty == prZWJ { + bit = lbZWJBit + } + mustBreakState := state < 0 || state == lbBK || state == lbCR || state == lbLF || state == lbNL + if !mustBreakState && state != lbSP && state != lbZW && state != lbQUSP && state != lbCLCPSP && state != lbB2SP { + // LB9. + return state | bit, LineDontBreak + } else { + // LB10. + if mustBreakState { + return lbAL | bit, LineMustBreak + } + return lbAL | bit, LineCanBreak + } + } + + // Find the applicable transition in the table. + var rule int + newState, lineBreak, rule = lbTransitions(state, nextProperty) + if newState < 0 { + // No specific transition found. Try the less specific ones. + anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny) + anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty) + if anyPropProp >= 0 && anyStateProp >= 0 { + // Both apply. We'll use a mix (see comments for grTransitions). + newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule + if anyPropRule < anyStateRule { + lineBreak, rule = anyPropLineBreak, anyPropRule + } + } else if anyPropProp >= 0 { + // We only have a specific state. + newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule + // This branch will probably never be reached because okAnyState will + // always be true given the current transition map. But we keep it here + // for future modifications to the transition map where this may not be + // true anymore. + } else if anyStateProp >= 0 { + // We only have a specific property. + newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule + } else { + // No known transition. LB31: ALL รท ALL. + newState, lineBreak, rule = lbAny, LineCanBreak, 310 + } + } + + // LB12a. + if rule > 121 && + nextProperty == prGL && + (state != lbSP && state != lbBA && state != lbHY && state != lbLB21a && state != lbQUSP && state != lbCLCPSP && state != lbB2SP) { + return lbGL, LineDontBreak + } + + // LB13. + if rule > 130 && state != lbNU && state != lbNUNU { + switch nextProperty { + case prCL: + return lbCL, LineDontBreak + case prCP: + return lbCP, LineDontBreak + case prIS: + return lbIS, LineDontBreak + case prSY: + return lbSY, LineDontBreak + } + } + + // LB25 (look ahead). + if rule > 250 && + (state == lbPR || state == lbPO) && + nextProperty == prOP || nextProperty == prHY { + var r rune + if b != nil { // Byte slice version. + r, _ = utf8.DecodeRune(b) + } else { // String version. + r, _ = utf8.DecodeRuneInString(str) + } + if r != utf8.RuneError { + pr, _ := propertyLineBreak(r) + if pr == prNU { + return lbNU, LineDontBreak + } + } + } + + // LB30 (part one). + if rule > 300 { + if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP { + ea := propertyEastAsianWidth(r) + if ea != prF && ea != prW && ea != prH { + return lbOP, LineDontBreak + } + } else if isCPeaFWH { + switch nextProperty { + case prAL: + return lbAL, LineDontBreak + case prHL: + return lbHL, LineDontBreak + case prNU: + return lbNU, LineDontBreak + } + } + } + + // LB30a. + if newState == lbAny && nextProperty == prRI { + if state != lbOddRI && state != lbEvenRI { // Includes state == -1. + // Transition into the first RI. + return lbOddRI, lineBreak + } + if state == lbOddRI { + // Don't break pairs of Regional Indicators. + return lbEvenRI, LineDontBreak + } + return lbOddRI, lineBreak + } + + // LB30b. + if rule > 302 { + if nextProperty == prEM { + if state == lbEB || state == lbExtPicCn { + return prAny, LineDontBreak + } + } + graphemeProperty := propertyGraphemes(r) + if graphemeProperty == prExtendedPictographic && generalCategory == gcCn { + return lbExtPicCn, LineCanBreak + } + } + + return +} |