diff options
Diffstat (limited to 'vendor/github.com/rivo/uniseg/grapheme.go')
-rw-r--r-- | vendor/github.com/rivo/uniseg/grapheme.go | 331 |
1 files changed, 0 insertions, 331 deletions
diff --git a/vendor/github.com/rivo/uniseg/grapheme.go b/vendor/github.com/rivo/uniseg/grapheme.go deleted file mode 100644 index b12403d43..000000000 --- a/vendor/github.com/rivo/uniseg/grapheme.go +++ /dev/null @@ -1,331 +0,0 @@ -package uniseg - -import "unicode/utf8" - -// Graphemes implements an iterator over Unicode grapheme clusters, or -// user-perceived characters. While iterating, it also provides information -// about word boundaries, sentence boundaries, line breaks, and monospace -// character widths. -// -// After constructing the class via [NewGraphemes] for a given string "str", -// [Graphemes.Next] is called for every grapheme cluster in a loop until it -// returns false. Inside the loop, information about the grapheme cluster as -// well as boundary information and character width is available via the various -// methods (see examples below). -// -// This class basically wraps the [StepString] parser and provides a convenient -// interface to it. If you are only interested in some parts of this package's -// functionality, using the specialized functions starting with "First" is -// almost always faster. -type Graphemes struct { - // The original string. - original string - - // The remaining string to be parsed. - remaining string - - // The current grapheme cluster. - cluster string - - // The byte offset of the current grapheme cluster relative to the original - // string. - offset int - - // The current boundary information of the [Step] parser. - boundaries int - - // The current state of the [Step] parser. - state int -} - -// NewGraphemes returns a new grapheme cluster iterator. -func NewGraphemes(str string) *Graphemes { - return &Graphemes{ - original: str, - remaining: str, - state: -1, - } -} - -// Next advances the iterator by one grapheme cluster and returns false if no -// clusters are left. This function must be called before the first cluster is -// accessed. -func (g *Graphemes) Next() bool { - if len(g.remaining) == 0 { - // We're already past the end. - g.state = -2 - g.cluster = "" - return false - } - g.offset += len(g.cluster) - g.cluster, g.remaining, g.boundaries, g.state = StepString(g.remaining, g.state) - return true -} - -// Runes returns a slice of runes (code points) which corresponds to the current -// grapheme cluster. If the iterator is already past the end or [Graphemes.Next] -// has not yet been called, nil is returned. -func (g *Graphemes) Runes() []rune { - if g.state < 0 { - return nil - } - return []rune(g.cluster) -} - -// Str returns a substring of the original string which corresponds to the -// current grapheme cluster. If the iterator is already past the end or -// [Graphemes.Next] has not yet been called, an empty string is returned. -func (g *Graphemes) Str() string { - return g.cluster -} - -// Bytes returns a byte slice which corresponds to the current grapheme cluster. -// If the iterator is already past the end or [Graphemes.Next] has not yet been -// called, nil is returned. -func (g *Graphemes) Bytes() []byte { - if g.state < 0 { - return nil - } - return []byte(g.cluster) -} - -// Positions returns the interval of the current grapheme cluster as byte -// positions into the original string. The first returned value "from" indexes -// the first byte and the second returned value "to" indexes the first byte that -// is not included anymore, i.e. str[from:to] is the current grapheme cluster of -// the original string "str". If [Graphemes.Next] has not yet been called, both -// values are 0. If the iterator is already past the end, both values are 1. -func (g *Graphemes) Positions() (int, int) { - if g.state == -1 { - return 0, 0 - } else if g.state == -2 { - return 1, 1 - } - return g.offset, g.offset + len(g.cluster) -} - -// IsWordBoundary returns true if a word ends after the current grapheme -// cluster. -func (g *Graphemes) IsWordBoundary() bool { - if g.state < 0 { - return true - } - return g.boundaries&MaskWord != 0 -} - -// IsSentenceBoundary returns true if a sentence ends after the current -// grapheme cluster. -func (g *Graphemes) IsSentenceBoundary() bool { - if g.state < 0 { - return true - } - return g.boundaries&MaskSentence != 0 -} - -// LineBreak returns whether the line can be broken after the current grapheme -// cluster. A value of [LineDontBreak] means the line may not be broken, a value -// of [LineMustBreak] means the line must be broken, and a value of -// [LineCanBreak] means the line may or may not be broken. -func (g *Graphemes) LineBreak() int { - if g.state == -1 { - return LineDontBreak - } - if g.state == -2 { - return LineMustBreak - } - return g.boundaries & MaskLine -} - -// Width returns the monospace width of the current grapheme cluster. -func (g *Graphemes) Width() int { - if g.state < 0 { - return 0 - } - return g.boundaries >> ShiftWidth -} - -// Reset puts the iterator into its initial state such that the next call to -// [Graphemes.Next] sets it to the first grapheme cluster again. -func (g *Graphemes) Reset() { - g.state = -1 - g.offset = 0 - g.cluster = "" - g.remaining = g.original -} - -// GraphemeClusterCount returns the number of user-perceived characters -// (grapheme clusters) for the given string. -func GraphemeClusterCount(s string) (n int) { - state := -1 - for len(s) > 0 { - _, s, _, state = FirstGraphemeClusterInString(s, state) - n++ - } - return -} - -// ReverseString reverses the given string while observing grapheme cluster -// boundaries. -func ReverseString(s string) string { - str := []byte(s) - reversed := make([]byte, len(str)) - state := -1 - index := len(str) - for len(str) > 0 { - var cluster []byte - cluster, str, _, state = FirstGraphemeCluster(str, state) - index -= len(cluster) - copy(reversed[index:], cluster) - if index <= len(str)/2 { - break - } - } - return string(reversed) -} - -// The number of bits the grapheme property must be shifted to make place for -// grapheme states. -const shiftGraphemePropState = 4 - -// FirstGraphemeCluster returns the first grapheme cluster found in the given -// byte slice according to the rules of [Unicode Standard Annex #29, Grapheme -// Cluster Boundaries]. This function can be called continuously to extract all -// grapheme clusters from a byte slice, as illustrated in the example below. -// -// If you don't know the current state, for example when calling the function -// for the first time, you must pass -1. For consecutive calls, pass the state -// and rest slice returned by the previous call. -// -// The "rest" slice is the sub-slice of the original byte slice "b" starting -// after the last byte of the identified grapheme cluster. If the length of the -// "rest" slice is 0, the entire byte slice "b" has been processed. The -// "cluster" byte slice is the sub-slice of the input slice containing the -// identified grapheme cluster. -// -// The returned width is the width of the grapheme cluster for most monospace -// fonts where a value of 1 represents one character cell. -// -// Given an empty byte slice "b", the function returns nil values. -// -// While slightly less convenient than using the Graphemes class, this function -// has much better performance and makes no allocations. It lends itself well to -// large byte slices. -// -// [Unicode Standard Annex #29, Grapheme Cluster Boundaries]: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries -func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) { - // An empty byte slice returns nothing. - if len(b) == 0 { - return - } - - // Extract the first rune. - r, length := utf8.DecodeRune(b) - if len(b) <= length { // If we're already past the end, there is nothing else to parse. - var prop int - if state < 0 { - prop = propertyGraphemes(r) - } else { - prop = state >> shiftGraphemePropState - } - return b, nil, runeWidth(r, prop), grAny | (prop << shiftGraphemePropState) - } - - // If we don't know the state, determine it now. - var firstProp int - if state < 0 { - state, firstProp, _ = transitionGraphemeState(state, r) - } else { - firstProp = state >> shiftGraphemePropState - } - width += runeWidth(r, firstProp) - - // Transition until we find a boundary. - for { - var ( - prop int - boundary bool - ) - - r, l := utf8.DecodeRune(b[length:]) - state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r) - - if boundary { - return b[:length], b[length:], width, state | (prop << shiftGraphemePropState) - } - - if firstProp == prExtendedPictographic { - if r == vs15 { - width = 1 - } else if r == vs16 { - width = 2 - } - } else if firstProp != prRegionalIndicator && firstProp != prL { - width += runeWidth(r, prop) - } - - length += l - if len(b) <= length { - return b, nil, width, grAny | (prop << shiftGraphemePropState) - } - } -} - -// FirstGraphemeClusterInString is like [FirstGraphemeCluster] but its input and -// outputs are strings. -func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, width, newState int) { - // An empty string returns nothing. - if len(str) == 0 { - return - } - - // Extract the first rune. - r, length := utf8.DecodeRuneInString(str) - if len(str) <= length { // If we're already past the end, there is nothing else to parse. - var prop int - if state < 0 { - prop = propertyGraphemes(r) - } else { - prop = state >> shiftGraphemePropState - } - return str, "", runeWidth(r, prop), grAny | (prop << shiftGraphemePropState) - } - - // If we don't know the state, determine it now. - var firstProp int - if state < 0 { - state, firstProp, _ = transitionGraphemeState(state, r) - } else { - firstProp = state >> shiftGraphemePropState - } - width += runeWidth(r, firstProp) - - // Transition until we find a boundary. - for { - var ( - prop int - boundary bool - ) - - r, l := utf8.DecodeRuneInString(str[length:]) - state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r) - - if boundary { - return str[:length], str[length:], width, state | (prop << shiftGraphemePropState) - } - - if firstProp == prExtendedPictographic { - if r == vs15 { - width = 1 - } else if r == vs16 { - width = 2 - } - } else if firstProp != prRegionalIndicator && firstProp != prL { - width += runeWidth(r, prop) - } - - length += l - if len(str) <= length { - return str, "", width, grAny | (prop << shiftGraphemePropState) - } - } -} |