summaryrefslogtreecommitdiff
path: root/vendor/github.com/rivo/uniseg/linerules.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/rivo/uniseg/linerules.go')
-rw-r--r--vendor/github.com/rivo/uniseg/linerules.go626
1 files changed, 626 insertions, 0 deletions
diff --git a/vendor/github.com/rivo/uniseg/linerules.go b/vendor/github.com/rivo/uniseg/linerules.go
new file mode 100644
index 000000000..7708ae0fb
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/linerules.go
@@ -0,0 +1,626 @@
+package uniseg
+
+import "unicode/utf8"
+
+// The states of the line break parser.
+const (
+ lbAny = iota
+ lbBK
+ lbCR
+ lbLF
+ lbNL
+ lbSP
+ lbZW
+ lbWJ
+ lbGL
+ lbBA
+ lbHY
+ lbCL
+ lbCP
+ lbEX
+ lbIS
+ lbSY
+ lbOP
+ lbQU
+ lbQUSP
+ lbNS
+ lbCLCPSP
+ lbB2
+ lbB2SP
+ lbCB
+ lbBB
+ lbLB21a
+ lbHL
+ lbAL
+ lbNU
+ lbPR
+ lbEB
+ lbIDEM
+ lbNUNU
+ lbNUSY
+ lbNUIS
+ lbNUCL
+ lbNUCP
+ lbPO
+ lbJL
+ lbJV
+ lbJT
+ lbH2
+ lbH3
+ lbOddRI
+ lbEvenRI
+ lbExtPicCn
+ lbZWJBit = 64
+ lbCPeaFWHBit = 128
+)
+
+// These constants define whether a given text may be broken into the next line.
+// If the break is optional (LineCanBreak), you may choose to break or not based
+// on your own criteria, for example, if the text has reached the available
+// width.
+const (
+ LineDontBreak = iota // You may not break the line here.
+ LineCanBreak // You may or may not break the line here.
+ LineMustBreak // You must break the line here.
+)
+
+// lbTransitions implements the line break parser's state transitions. It's
+// anologous to [grTransitions], see comments there for details.
+//
+// Unicode version 15.0.0.
+func lbTransitions(state, prop int) (newState, lineBreak, rule int) {
+ switch uint64(state) | uint64(prop)<<32 {
+ // LB4.
+ case lbBK | prAny<<32:
+ return lbAny, LineMustBreak, 40
+
+ // LB5.
+ case lbCR | prLF<<32:
+ return lbLF, LineDontBreak, 50
+ case lbCR | prAny<<32:
+ return lbAny, LineMustBreak, 50
+ case lbLF | prAny<<32:
+ return lbAny, LineMustBreak, 50
+ case lbNL | prAny<<32:
+ return lbAny, LineMustBreak, 50
+
+ // LB6.
+ case lbAny | prBK<<32:
+ return lbBK, LineDontBreak, 60
+ case lbAny | prCR<<32:
+ return lbCR, LineDontBreak, 60
+ case lbAny | prLF<<32:
+ return lbLF, LineDontBreak, 60
+ case lbAny | prNL<<32:
+ return lbNL, LineDontBreak, 60
+
+ // LB7.
+ case lbAny | prSP<<32:
+ return lbSP, LineDontBreak, 70
+ case lbAny | prZW<<32:
+ return lbZW, LineDontBreak, 70
+
+ // LB8.
+ case lbZW | prSP<<32:
+ return lbZW, LineDontBreak, 70
+ case lbZW | prAny<<32:
+ return lbAny, LineCanBreak, 80
+
+ // LB11.
+ case lbAny | prWJ<<32:
+ return lbWJ, LineDontBreak, 110
+ case lbWJ | prAny<<32:
+ return lbAny, LineDontBreak, 110
+
+ // LB12.
+ case lbAny | prGL<<32:
+ return lbGL, LineCanBreak, 310
+ case lbGL | prAny<<32:
+ return lbAny, LineDontBreak, 120
+
+ // LB13 (simple transitions).
+ case lbAny | prCL<<32:
+ return lbCL, LineCanBreak, 310
+ case lbAny | prCP<<32:
+ return lbCP, LineCanBreak, 310
+ case lbAny | prEX<<32:
+ return lbEX, LineDontBreak, 130
+ case lbAny | prIS<<32:
+ return lbIS, LineCanBreak, 310
+ case lbAny | prSY<<32:
+ return lbSY, LineCanBreak, 310
+
+ // LB14.
+ case lbAny | prOP<<32:
+ return lbOP, LineCanBreak, 310
+ case lbOP | prSP<<32:
+ return lbOP, LineDontBreak, 70
+ case lbOP | prAny<<32:
+ return lbAny, LineDontBreak, 140
+
+ // LB15.
+ case lbQU | prSP<<32:
+ return lbQUSP, LineDontBreak, 70
+ case lbQU | prOP<<32:
+ return lbOP, LineDontBreak, 150
+ case lbQUSP | prOP<<32:
+ return lbOP, LineDontBreak, 150
+
+ // LB16.
+ case lbCL | prSP<<32:
+ return lbCLCPSP, LineDontBreak, 70
+ case lbNUCL | prSP<<32:
+ return lbCLCPSP, LineDontBreak, 70
+ case lbCP | prSP<<32:
+ return lbCLCPSP, LineDontBreak, 70
+ case lbNUCP | prSP<<32:
+ return lbCLCPSP, LineDontBreak, 70
+ case lbCL | prNS<<32:
+ return lbNS, LineDontBreak, 160
+ case lbNUCL | prNS<<32:
+ return lbNS, LineDontBreak, 160
+ case lbCP | prNS<<32:
+ return lbNS, LineDontBreak, 160
+ case lbNUCP | prNS<<32:
+ return lbNS, LineDontBreak, 160
+ case lbCLCPSP | prNS<<32:
+ return lbNS, LineDontBreak, 160
+
+ // LB17.
+ case lbAny | prB2<<32:
+ return lbB2, LineCanBreak, 310
+ case lbB2 | prSP<<32:
+ return lbB2SP, LineDontBreak, 70
+ case lbB2 | prB2<<32:
+ return lbB2, LineDontBreak, 170
+ case lbB2SP | prB2<<32:
+ return lbB2, LineDontBreak, 170
+
+ // LB18.
+ case lbSP | prAny<<32:
+ return lbAny, LineCanBreak, 180
+ case lbQUSP | prAny<<32:
+ return lbAny, LineCanBreak, 180
+ case lbCLCPSP | prAny<<32:
+ return lbAny, LineCanBreak, 180
+ case lbB2SP | prAny<<32:
+ return lbAny, LineCanBreak, 180
+
+ // LB19.
+ case lbAny | prQU<<32:
+ return lbQU, LineDontBreak, 190
+ case lbQU | prAny<<32:
+ return lbAny, LineDontBreak, 190
+
+ // LB20.
+ case lbAny | prCB<<32:
+ return lbCB, LineCanBreak, 200
+ case lbCB | prAny<<32:
+ return lbAny, LineCanBreak, 200
+
+ // LB21.
+ case lbAny | prBA<<32:
+ return lbBA, LineDontBreak, 210
+ case lbAny | prHY<<32:
+ return lbHY, LineDontBreak, 210
+ case lbAny | prNS<<32:
+ return lbNS, LineDontBreak, 210
+ case lbAny | prBB<<32:
+ return lbBB, LineCanBreak, 310
+ case lbBB | prAny<<32:
+ return lbAny, LineDontBreak, 210
+
+ // LB21a.
+ case lbAny | prHL<<32:
+ return lbHL, LineCanBreak, 310
+ case lbHL | prHY<<32:
+ return lbLB21a, LineDontBreak, 210
+ case lbHL | prBA<<32:
+ return lbLB21a, LineDontBreak, 210
+ case lbLB21a | prAny<<32:
+ return lbAny, LineDontBreak, 211
+
+ // LB21b.
+ case lbSY | prHL<<32:
+ return lbHL, LineDontBreak, 212
+ case lbNUSY | prHL<<32:
+ return lbHL, LineDontBreak, 212
+
+ // LB22.
+ case lbAny | prIN<<32:
+ return lbAny, LineDontBreak, 220
+
+ // LB23.
+ case lbAny | prAL<<32:
+ return lbAL, LineCanBreak, 310
+ case lbAny | prNU<<32:
+ return lbNU, LineCanBreak, 310
+ case lbAL | prNU<<32:
+ return lbNU, LineDontBreak, 230
+ case lbHL | prNU<<32:
+ return lbNU, LineDontBreak, 230
+ case lbNU | prAL<<32:
+ return lbAL, LineDontBreak, 230
+ case lbNU | prHL<<32:
+ return lbHL, LineDontBreak, 230
+ case lbNUNU | prAL<<32:
+ return lbAL, LineDontBreak, 230
+ case lbNUNU | prHL<<32:
+ return lbHL, LineDontBreak, 230
+
+ // LB23a.
+ case lbAny | prPR<<32:
+ return lbPR, LineCanBreak, 310
+ case lbAny | prID<<32:
+ return lbIDEM, LineCanBreak, 310
+ case lbAny | prEB<<32:
+ return lbEB, LineCanBreak, 310
+ case lbAny | prEM<<32:
+ return lbIDEM, LineCanBreak, 310
+ case lbPR | prID<<32:
+ return lbIDEM, LineDontBreak, 231
+ case lbPR | prEB<<32:
+ return lbEB, LineDontBreak, 231
+ case lbPR | prEM<<32:
+ return lbIDEM, LineDontBreak, 231
+ case lbIDEM | prPO<<32:
+ return lbPO, LineDontBreak, 231
+ case lbEB | prPO<<32:
+ return lbPO, LineDontBreak, 231
+
+ // LB24.
+ case lbAny | prPO<<32:
+ return lbPO, LineCanBreak, 310
+ case lbPR | prAL<<32:
+ return lbAL, LineDontBreak, 240
+ case lbPR | prHL<<32:
+ return lbHL, LineDontBreak, 240
+ case lbPO | prAL<<32:
+ return lbAL, LineDontBreak, 240
+ case lbPO | prHL<<32:
+ return lbHL, LineDontBreak, 240
+ case lbAL | prPR<<32:
+ return lbPR, LineDontBreak, 240
+ case lbAL | prPO<<32:
+ return lbPO, LineDontBreak, 240
+ case lbHL | prPR<<32:
+ return lbPR, LineDontBreak, 240
+ case lbHL | prPO<<32:
+ return lbPO, LineDontBreak, 240
+
+ // LB25 (simple transitions).
+ case lbPR | prNU<<32:
+ return lbNU, LineDontBreak, 250
+ case lbPO | prNU<<32:
+ return lbNU, LineDontBreak, 250
+ case lbOP | prNU<<32:
+ return lbNU, LineDontBreak, 250
+ case lbHY | prNU<<32:
+ return lbNU, LineDontBreak, 250
+ case lbNU | prNU<<32:
+ return lbNUNU, LineDontBreak, 250
+ case lbNU | prSY<<32:
+ return lbNUSY, LineDontBreak, 250
+ case lbNU | prIS<<32:
+ return lbNUIS, LineDontBreak, 250
+ case lbNUNU | prNU<<32:
+ return lbNUNU, LineDontBreak, 250
+ case lbNUNU | prSY<<32:
+ return lbNUSY, LineDontBreak, 250
+ case lbNUNU | prIS<<32:
+ return lbNUIS, LineDontBreak, 250
+ case lbNUSY | prNU<<32:
+ return lbNUNU, LineDontBreak, 250
+ case lbNUSY | prSY<<32:
+ return lbNUSY, LineDontBreak, 250
+ case lbNUSY | prIS<<32:
+ return lbNUIS, LineDontBreak, 250
+ case lbNUIS | prNU<<32:
+ return lbNUNU, LineDontBreak, 250
+ case lbNUIS | prSY<<32:
+ return lbNUSY, LineDontBreak, 250
+ case lbNUIS | prIS<<32:
+ return lbNUIS, LineDontBreak, 250
+ case lbNU | prCL<<32:
+ return lbNUCL, LineDontBreak, 250
+ case lbNU | prCP<<32:
+ return lbNUCP, LineDontBreak, 250
+ case lbNUNU | prCL<<32:
+ return lbNUCL, LineDontBreak, 250
+ case lbNUNU | prCP<<32:
+ return lbNUCP, LineDontBreak, 250
+ case lbNUSY | prCL<<32:
+ return lbNUCL, LineDontBreak, 250
+ case lbNUSY | prCP<<32:
+ return lbNUCP, LineDontBreak, 250
+ case lbNUIS | prCL<<32:
+ return lbNUCL, LineDontBreak, 250
+ case lbNUIS | prCP<<32:
+ return lbNUCP, LineDontBreak, 250
+ case lbNU | prPO<<32:
+ return lbPO, LineDontBreak, 250
+ case lbNUNU | prPO<<32:
+ return lbPO, LineDontBreak, 250
+ case lbNUSY | prPO<<32:
+ return lbPO, LineDontBreak, 250
+ case lbNUIS | prPO<<32:
+ return lbPO, LineDontBreak, 250
+ case lbNUCL | prPO<<32:
+ return lbPO, LineDontBreak, 250
+ case lbNUCP | prPO<<32:
+ return lbPO, LineDontBreak, 250
+ case lbNU | prPR<<32:
+ return lbPR, LineDontBreak, 250
+ case lbNUNU | prPR<<32:
+ return lbPR, LineDontBreak, 250
+ case lbNUSY | prPR<<32:
+ return lbPR, LineDontBreak, 250
+ case lbNUIS | prPR<<32:
+ return lbPR, LineDontBreak, 250
+ case lbNUCL | prPR<<32:
+ return lbPR, LineDontBreak, 250
+ case lbNUCP | prPR<<32:
+ return lbPR, LineDontBreak, 250
+
+ // LB26.
+ case lbAny | prJL<<32:
+ return lbJL, LineCanBreak, 310
+ case lbAny | prJV<<32:
+ return lbJV, LineCanBreak, 310
+ case lbAny | prJT<<32:
+ return lbJT, LineCanBreak, 310
+ case lbAny | prH2<<32:
+ return lbH2, LineCanBreak, 310
+ case lbAny | prH3<<32:
+ return lbH3, LineCanBreak, 310
+ case lbJL | prJL<<32:
+ return lbJL, LineDontBreak, 260
+ case lbJL | prJV<<32:
+ return lbJV, LineDontBreak, 260
+ case lbJL | prH2<<32:
+ return lbH2, LineDontBreak, 260
+ case lbJL | prH3<<32:
+ return lbH3, LineDontBreak, 260
+ case lbJV | prJV<<32:
+ return lbJV, LineDontBreak, 260
+ case lbJV | prJT<<32:
+ return lbJT, LineDontBreak, 260
+ case lbH2 | prJV<<32:
+ return lbJV, LineDontBreak, 260
+ case lbH2 | prJT<<32:
+ return lbJT, LineDontBreak, 260
+ case lbJT | prJT<<32:
+ return lbJT, LineDontBreak, 260
+ case lbH3 | prJT<<32:
+ return lbJT, LineDontBreak, 260
+
+ // LB27.
+ case lbJL | prPO<<32:
+ return lbPO, LineDontBreak, 270
+ case lbJV | prPO<<32:
+ return lbPO, LineDontBreak, 270
+ case lbJT | prPO<<32:
+ return lbPO, LineDontBreak, 270
+ case lbH2 | prPO<<32:
+ return lbPO, LineDontBreak, 270
+ case lbH3 | prPO<<32:
+ return lbPO, LineDontBreak, 270
+ case lbPR | prJL<<32:
+ return lbJL, LineDontBreak, 270
+ case lbPR | prJV<<32:
+ return lbJV, LineDontBreak, 270
+ case lbPR | prJT<<32:
+ return lbJT, LineDontBreak, 270
+ case lbPR | prH2<<32:
+ return lbH2, LineDontBreak, 270
+ case lbPR | prH3<<32:
+ return lbH3, LineDontBreak, 270
+
+ // LB28.
+ case lbAL | prAL<<32:
+ return lbAL, LineDontBreak, 280
+ case lbAL | prHL<<32:
+ return lbHL, LineDontBreak, 280
+ case lbHL | prAL<<32:
+ return lbAL, LineDontBreak, 280
+ case lbHL | prHL<<32:
+ return lbHL, LineDontBreak, 280
+
+ // LB29.
+ case lbIS | prAL<<32:
+ return lbAL, LineDontBreak, 290
+ case lbIS | prHL<<32:
+ return lbHL, LineDontBreak, 290
+ case lbNUIS | prAL<<32:
+ return lbAL, LineDontBreak, 290
+ case lbNUIS | prHL<<32:
+ return lbHL, LineDontBreak, 290
+
+ default:
+ return -1, -1, -1
+ }
+}
+
+// transitionLineBreakState determines the new state of the line break parser
+// given the current state and the next code point. It also returns the type of
+// line break: LineDontBreak, LineCanBreak, or LineMustBreak. If more than one
+// code point is needed to determine the new state, the byte slice or the string
+// starting after rune "r" can be used (whichever is not nil or empty) for
+// further lookups.
+func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
+ // Determine the property of the next character.
+ nextProperty, generalCategory := propertyLineBreak(r)
+
+ // Prepare.
+ var forceNoBreak, isCPeaFWH bool
+ if state >= 0 && state&lbCPeaFWHBit != 0 {
+ isCPeaFWH = true // LB30: CP but ea is not F, W, or H.
+ state = state &^ lbCPeaFWHBit
+ }
+ if state >= 0 && state&lbZWJBit != 0 {
+ state = state &^ lbZWJBit // Extract zero-width joiner bit.
+ forceNoBreak = true // LB8a.
+ }
+
+ defer func() {
+ // Transition into LB30.
+ if newState == lbCP || newState == lbNUCP {
+ ea := propertyEastAsianWidth(r)
+ if ea != prF && ea != prW && ea != prH {
+ newState |= lbCPeaFWHBit
+ }
+ }
+
+ // Override break.
+ if forceNoBreak {
+ lineBreak = LineDontBreak
+ }
+ }()
+
+ // LB1.
+ if nextProperty == prAI || nextProperty == prSG || nextProperty == prXX {
+ nextProperty = prAL
+ } else if nextProperty == prSA {
+ if generalCategory == gcMn || generalCategory == gcMc {
+ nextProperty = prCM
+ } else {
+ nextProperty = prAL
+ }
+ } else if nextProperty == prCJ {
+ nextProperty = prNS
+ }
+
+ // Combining marks.
+ if nextProperty == prZWJ || nextProperty == prCM {
+ var bit int
+ if nextProperty == prZWJ {
+ bit = lbZWJBit
+ }
+ mustBreakState := state < 0 || state == lbBK || state == lbCR || state == lbLF || state == lbNL
+ if !mustBreakState && state != lbSP && state != lbZW && state != lbQUSP && state != lbCLCPSP && state != lbB2SP {
+ // LB9.
+ return state | bit, LineDontBreak
+ } else {
+ // LB10.
+ if mustBreakState {
+ return lbAL | bit, LineMustBreak
+ }
+ return lbAL | bit, LineCanBreak
+ }
+ }
+
+ // Find the applicable transition in the table.
+ var rule int
+ newState, lineBreak, rule = lbTransitions(state, nextProperty)
+ if newState < 0 {
+ // No specific transition found. Try the less specific ones.
+ anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny)
+ anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty)
+ if anyPropProp >= 0 && anyStateProp >= 0 {
+ // Both apply. We'll use a mix (see comments for grTransitions).
+ newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
+ if anyPropRule < anyStateRule {
+ lineBreak, rule = anyPropLineBreak, anyPropRule
+ }
+ } else if anyPropProp >= 0 {
+ // We only have a specific state.
+ newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule
+ // This branch will probably never be reached because okAnyState will
+ // always be true given the current transition map. But we keep it here
+ // for future modifications to the transition map where this may not be
+ // true anymore.
+ } else if anyStateProp >= 0 {
+ // We only have a specific property.
+ newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
+ } else {
+ // No known transition. LB31: ALL รท ALL.
+ newState, lineBreak, rule = lbAny, LineCanBreak, 310
+ }
+ }
+
+ // LB12a.
+ if rule > 121 &&
+ nextProperty == prGL &&
+ (state != lbSP && state != lbBA && state != lbHY && state != lbLB21a && state != lbQUSP && state != lbCLCPSP && state != lbB2SP) {
+ return lbGL, LineDontBreak
+ }
+
+ // LB13.
+ if rule > 130 && state != lbNU && state != lbNUNU {
+ switch nextProperty {
+ case prCL:
+ return lbCL, LineDontBreak
+ case prCP:
+ return lbCP, LineDontBreak
+ case prIS:
+ return lbIS, LineDontBreak
+ case prSY:
+ return lbSY, LineDontBreak
+ }
+ }
+
+ // LB25 (look ahead).
+ if rule > 250 &&
+ (state == lbPR || state == lbPO) &&
+ nextProperty == prOP || nextProperty == prHY {
+ var r rune
+ if b != nil { // Byte slice version.
+ r, _ = utf8.DecodeRune(b)
+ } else { // String version.
+ r, _ = utf8.DecodeRuneInString(str)
+ }
+ if r != utf8.RuneError {
+ pr, _ := propertyLineBreak(r)
+ if pr == prNU {
+ return lbNU, LineDontBreak
+ }
+ }
+ }
+
+ // LB30 (part one).
+ if rule > 300 {
+ if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
+ ea := propertyEastAsianWidth(r)
+ if ea != prF && ea != prW && ea != prH {
+ return lbOP, LineDontBreak
+ }
+ } else if isCPeaFWH {
+ switch nextProperty {
+ case prAL:
+ return lbAL, LineDontBreak
+ case prHL:
+ return lbHL, LineDontBreak
+ case prNU:
+ return lbNU, LineDontBreak
+ }
+ }
+ }
+
+ // LB30a.
+ if newState == lbAny && nextProperty == prRI {
+ if state != lbOddRI && state != lbEvenRI { // Includes state == -1.
+ // Transition into the first RI.
+ return lbOddRI, lineBreak
+ }
+ if state == lbOddRI {
+ // Don't break pairs of Regional Indicators.
+ return lbEvenRI, LineDontBreak
+ }
+ return lbOddRI, lineBreak
+ }
+
+ // LB30b.
+ if rule > 302 {
+ if nextProperty == prEM {
+ if state == lbEB || state == lbExtPicCn {
+ return prAny, LineDontBreak
+ }
+ }
+ graphemeProperty := propertyGraphemes(r)
+ if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
+ return lbExtPicCn, LineCanBreak
+ }
+ }
+
+ return
+}