summaryrefslogtreecommitdiff
path: root/vendor/github.com/rivo/uniseg/graphemerules.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/rivo/uniseg/graphemerules.go')
-rw-r--r--vendor/github.com/rivo/uniseg/graphemerules.go176
1 files changed, 176 insertions, 0 deletions
diff --git a/vendor/github.com/rivo/uniseg/graphemerules.go b/vendor/github.com/rivo/uniseg/graphemerules.go
new file mode 100644
index 000000000..5d399d29c
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/graphemerules.go
@@ -0,0 +1,176 @@
+package uniseg
+
+// The states of the grapheme cluster parser.
+const (
+ grAny = iota
+ grCR
+ grControlLF
+ grL
+ grLVV
+ grLVTT
+ grPrepend
+ grExtendedPictographic
+ grExtendedPictographicZWJ
+ grRIOdd
+ grRIEven
+)
+
+// The grapheme cluster parser's breaking instructions.
+const (
+ grNoBoundary = iota
+ grBoundary
+)
+
+// grTransitions implements the grapheme cluster parser's state transitions.
+// Maps state and property to a new state, a breaking instruction, and rule
+// number. The breaking instruction always refers to the boundary between the
+// last and next code point. Returns negative values if no transition is found.
+//
+// This function is used as follows:
+//
+// 1. Find specific state + specific property. Stop if found.
+// 2. Find specific state + any property.
+// 3. Find any state + specific property.
+// 4. If only (2) or (3) (but not both) was found, stop.
+// 5. If both (2) and (3) were found, use state from (3) and breaking instruction
+// from the transition with the lower rule number, prefer (3) if rule numbers
+// are equal. Stop.
+// 6. Assume grAny and grBoundary.
+//
+// Unicode version 15.0.0.
+func grTransitions(state, prop int) (newState int, newProp int, boundary int) {
+ // It turns out that using a big switch statement is much faster than using
+ // a map.
+
+ switch uint64(state) | uint64(prop)<<32 {
+ // GB5
+ case grAny | prCR<<32:
+ return grCR, grBoundary, 50
+ case grAny | prLF<<32:
+ return grControlLF, grBoundary, 50
+ case grAny | prControl<<32:
+ return grControlLF, grBoundary, 50
+
+ // GB4
+ case grCR | prAny<<32:
+ return grAny, grBoundary, 40
+ case grControlLF | prAny<<32:
+ return grAny, grBoundary, 40
+
+ // GB3
+ case grCR | prLF<<32:
+ return grControlLF, grNoBoundary, 30
+
+ // GB6
+ case grAny | prL<<32:
+ return grL, grBoundary, 9990
+ case grL | prL<<32:
+ return grL, grNoBoundary, 60
+ case grL | prV<<32:
+ return grLVV, grNoBoundary, 60
+ case grL | prLV<<32:
+ return grLVV, grNoBoundary, 60
+ case grL | prLVT<<32:
+ return grLVTT, grNoBoundary, 60
+
+ // GB7
+ case grAny | prLV<<32:
+ return grLVV, grBoundary, 9990
+ case grAny | prV<<32:
+ return grLVV, grBoundary, 9990
+ case grLVV | prV<<32:
+ return grLVV, grNoBoundary, 70
+ case grLVV | prT<<32:
+ return grLVTT, grNoBoundary, 70
+
+ // GB8
+ case grAny | prLVT<<32:
+ return grLVTT, grBoundary, 9990
+ case grAny | prT<<32:
+ return grLVTT, grBoundary, 9990
+ case grLVTT | prT<<32:
+ return grLVTT, grNoBoundary, 80
+
+ // GB9
+ case grAny | prExtend<<32:
+ return grAny, grNoBoundary, 90
+ case grAny | prZWJ<<32:
+ return grAny, grNoBoundary, 90
+
+ // GB9a
+ case grAny | prSpacingMark<<32:
+ return grAny, grNoBoundary, 91
+
+ // GB9b
+ case grAny | prPrepend<<32:
+ return grPrepend, grBoundary, 9990
+ case grPrepend | prAny<<32:
+ return grAny, grNoBoundary, 92
+
+ // GB11
+ case grAny | prExtendedPictographic<<32:
+ return grExtendedPictographic, grBoundary, 9990
+ case grExtendedPictographic | prExtend<<32:
+ return grExtendedPictographic, grNoBoundary, 110
+ case grExtendedPictographic | prZWJ<<32:
+ return grExtendedPictographicZWJ, grNoBoundary, 110
+ case grExtendedPictographicZWJ | prExtendedPictographic<<32:
+ return grExtendedPictographic, grNoBoundary, 110
+
+ // GB12 / GB13
+ case grAny | prRegionalIndicator<<32:
+ return grRIOdd, grBoundary, 9990
+ case grRIOdd | prRegionalIndicator<<32:
+ return grRIEven, grNoBoundary, 120
+ case grRIEven | prRegionalIndicator<<32:
+ return grRIOdd, grBoundary, 120
+ default:
+ return -1, -1, -1
+ }
+}
+
+// transitionGraphemeState determines the new state of the grapheme cluster
+// parser given the current state and the next code point. It also returns the
+// code point's grapheme property (the value mapped by the [graphemeCodePoints]
+// table) and whether a cluster boundary was detected.
+func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) {
+ // Determine the property of the next character.
+ prop = propertyGraphemes(r)
+
+ // Find the applicable transition.
+ nextState, nextProp, _ := grTransitions(state, prop)
+ if nextState >= 0 {
+ // We have a specific transition. We'll use it.
+ return nextState, prop, nextProp == grBoundary
+ }
+
+ // No specific transition found. Try the less specific ones.
+ anyPropState, anyPropProp, anyPropRule := grTransitions(state, prAny)
+ anyStateState, anyStateProp, anyStateRule := grTransitions(grAny, prop)
+ if anyPropState >= 0 && anyStateState >= 0 {
+ // Both apply. We'll use a mix (see comments for grTransitions).
+ newState = anyStateState
+ boundary = anyStateProp == grBoundary
+ if anyPropRule < anyStateRule {
+ boundary = anyPropProp == grBoundary
+ }
+ return
+ }
+
+ if anyPropState >= 0 {
+ // We only have a specific state.
+ return anyPropState, prop, anyPropProp == grBoundary
+ // This branch will probably never be reached because okAnyState will
+ // always be true given the current transition map. But we keep it here
+ // for future modifications to the transition map where this may not be
+ // true anymore.
+ }
+
+ if anyStateState >= 0 {
+ // We only have a specific property.
+ return anyStateState, prop, anyStateProp == grBoundary
+ }
+
+ // No known transition. GB999: Any รท Any.
+ return grAny, prop, true
+}