diff options
Diffstat (limited to 'vendor/github.com/rivo/uniseg/properties.go')
-rw-r--r-- | vendor/github.com/rivo/uniseg/properties.go | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/vendor/github.com/rivo/uniseg/properties.go b/vendor/github.com/rivo/uniseg/properties.go new file mode 100644 index 000000000..6290e6810 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/properties.go @@ -0,0 +1,208 @@ +package uniseg + +// The Unicode properties as used in the various parsers. Only the ones needed +// in the context of this package are included. +const ( + prXX = 0 // Same as prAny. + prAny = iota // prAny must be 0. + prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector. + prCR + prLF + prControl + prExtend + prRegionalIndicator + prSpacingMark + prL + prV + prT + prLV + prLVT + prZWJ + prExtendedPictographic + prNewline + prWSegSpace + prDoubleQuote + prSingleQuote + prMidNumLet + prNumeric + prMidLetter + prMidNum + prExtendNumLet + prALetter + prFormat + prHebrewLetter + prKatakana + prSp + prSTerm + prClose + prSContinue + prATerm + prUpper + prLower + prSep + prOLetter + prCM + prBA + prBK + prSP + prEX + prQU + prAL + prPR + prPO + prOP + prCP + prIS + prHY + prSY + prNU + prCL + prNL + prGL + prAI + prBB + prHL + prSA + prJL + prJV + prJT + prNS + prZW + prB2 + prIN + prWJ + prID + prEB + prCJ + prH2 + prH3 + prSG + prCB + prRI + prEM + prN + prNa + prA + prW + prH + prF + prEmojiPresentation +) + +// Unicode General Categories. Only the ones needed in the context of this +// package are included. +const ( + gcNone = iota // gcNone must be 0. + gcCc + gcZs + gcPo + gcSc + gcPs + gcPe + gcSm + gcPd + gcNd + gcLu + gcSk + gcPc + gcLl + gcSo + gcLo + gcPi + gcCf + gcNo + gcPf + gcLC + gcLm + gcMn + gcMe + gcMc + gcNl + gcZl + gcZp + gcCn + gcCs + gcCo +) + +// Special code points. +const ( + vs15 = 0xfe0e // Variation Selector-15 (text presentation) + vs16 = 0xfe0f // Variation Selector-16 (emoji presentation) +) + +// propertySearch performs a binary search on a property slice and returns the +// entry whose range (start = first array element, end = second array element) +// includes r, or an array of 0's if no such entry was found. +func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) { + // Run a binary search. + from := 0 + to := len(dictionary) + for to > from { + middle := (from + to) / 2 + cpRange := dictionary[middle] + if int(r) < cpRange[0] { + to = middle + continue + } + if int(r) > cpRange[1] { + from = middle + 1 + continue + } + return cpRange + } + return +} + +// property returns the Unicode property value (see constants above) of the +// given code point. +func property(dictionary [][3]int, r rune) int { + return propertySearch(dictionary, r)[2] +} + +// propertyLineBreak returns the Unicode property value and General Category +// (see constants above) of the given code point, as listed in the line break +// code points table, while fast tracking ASCII digits and letters. +func propertyLineBreak(r rune) (property, generalCategory int) { + if r >= 'a' && r <= 'z' { + return prAL, gcLl + } + if r >= 'A' && r <= 'Z' { + return prAL, gcLu + } + if r >= '0' && r <= '9' { + return prNU, gcNd + } + entry := propertySearch(lineBreakCodePoints, r) + return entry[2], entry[3] +} + +// propertyGraphemes returns the Unicode grapheme cluster property value of the +// given code point while fast tracking ASCII characters. +func propertyGraphemes(r rune) int { + if r >= 0x20 && r <= 0x7e { + return prAny + } + if r == 0x0a { + return prLF + } + if r == 0x0d { + return prCR + } + if r >= 0 && r <= 0x1f || r == 0x7f { + return prControl + } + return property(graphemeCodePoints, r) +} + +// propertyEastAsianWidth returns the Unicode East Asian Width property value of +// the given code point while fast tracking ASCII characters. +func propertyEastAsianWidth(r rune) int { + if r >= 0x20 && r <= 0x7e { + return prNa + } + if r >= 0 && r <= 0x1f || r == 0x7f { + return prN + } + return property(eastAsianWidth, r) +} |