diff options
Diffstat (limited to 'vendor/github.com/huandu/xstrings/translate.go')
-rw-r--r-- | vendor/github.com/huandu/xstrings/translate.go | 546 |
1 files changed, 546 insertions, 0 deletions
diff --git a/vendor/github.com/huandu/xstrings/translate.go b/vendor/github.com/huandu/xstrings/translate.go new file mode 100644 index 000000000..42e694fb1 --- /dev/null +++ b/vendor/github.com/huandu/xstrings/translate.go @@ -0,0 +1,546 @@ +// Copyright 2015 Huan Du. All rights reserved. +// Licensed under the MIT license that can be found in the LICENSE file. + +package xstrings + +import ( + "unicode" + "unicode/utf8" +) + +type runeRangeMap struct { + FromLo rune // Lower bound of range map. + FromHi rune // An inclusive higher bound of range map. + ToLo rune + ToHi rune +} + +type runeDict struct { + Dict [unicode.MaxASCII + 1]rune +} + +type runeMap map[rune]rune + +// Translator can translate string with pre-compiled from and to patterns. +// If a from/to pattern pair needs to be used more than once, it's recommended +// to create a Translator and reuse it. +type Translator struct { + quickDict *runeDict // A quick dictionary to look up rune by index. Only available for latin runes. + runeMap runeMap // Rune map for translation. + ranges []*runeRangeMap // Ranges of runes. + mappedRune rune // If mappedRune >= 0, all matched runes are translated to the mappedRune. + reverted bool // If to pattern is empty, all matched characters will be deleted. + hasPattern bool +} + +// NewTranslator creates new Translator through a from/to pattern pair. +func NewTranslator(from, to string) *Translator { + tr := &Translator{} + + if from == "" { + return tr + } + + reverted := from[0] == '^' + deletion := len(to) == 0 + + if reverted { + from = from[1:] + } + + var fromStart, fromEnd, fromRangeStep rune + var toStart, toEnd, toRangeStep rune + var fromRangeSize, toRangeSize rune + var singleRunes []rune + + // Update the to rune range. + updateRange := func() { + // No more rune to read in the to rune pattern. + if toEnd == utf8.RuneError { + return + } + + if toRangeStep == 0 { + to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd) + return + } + + // Current range is not empty. Consume 1 rune from start. + if toStart != toEnd { + toStart += toRangeStep + return + } + + // No more rune. Repeat the last rune. + if to == "" { + toEnd = utf8.RuneError + return + } + + // Both start and end are used. Read two more runes from the to pattern. + to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError) + } + + if deletion { + toStart = utf8.RuneError + toEnd = utf8.RuneError + } else { + // If from pattern is reverted, only the last rune in the to pattern will be used. + if reverted { + var size int + + for len(to) > 0 { + toStart, size = utf8.DecodeRuneInString(to) + to = to[size:] + } + + toEnd = utf8.RuneError + } else { + to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError) + } + } + + fromEnd = utf8.RuneError + + for len(from) > 0 { + from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd) + + // fromStart is a single character. Just map it with a rune in the to pattern. + if fromRangeStep == 0 { + singleRunes = tr.addRune(fromStart, toStart, singleRunes) + updateRange() + continue + } + + for toEnd != utf8.RuneError && fromStart != fromEnd { + // If mapped rune is a single character instead of a range, simply shift first + // rune in the range. + if toRangeStep == 0 { + singleRunes = tr.addRune(fromStart, toStart, singleRunes) + updateRange() + fromStart += fromRangeStep + continue + } + + fromRangeSize = (fromEnd - fromStart) * fromRangeStep + toRangeSize = (toEnd - toStart) * toRangeStep + + // Not enough runes in the to pattern. Need to read more. + if fromRangeSize > toRangeSize { + fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes) + fromStart += fromRangeStep + updateRange() + + // Edge case: If fromRangeSize == toRangeSize + 1, the last fromStart value needs be considered + // as a single rune. + if fromStart == fromEnd { + singleRunes = tr.addRune(fromStart, toStart, singleRunes) + updateRange() + } + + continue + } + + fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes) + updateRange() + break + } + + if fromStart == fromEnd { + fromEnd = utf8.RuneError + continue + } + + _, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes) + fromEnd = utf8.RuneError + } + + if fromEnd != utf8.RuneError { + tr.addRune(fromEnd, toStart, singleRunes) + } + + tr.reverted = reverted + tr.mappedRune = -1 + tr.hasPattern = true + + // Translate RuneError only if in deletion or reverted mode. + if deletion || reverted { + tr.mappedRune = toStart + } + + return tr +} + +func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune { + if from <= unicode.MaxASCII { + if tr.quickDict == nil { + tr.quickDict = &runeDict{} + } + + tr.quickDict.Dict[from] = to + } else { + if tr.runeMap == nil { + tr.runeMap = make(runeMap) + } + + tr.runeMap[from] = to + } + + singleRunes = append(singleRunes, from) + return singleRunes +} + +func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) { + var r rune + var rrm *runeRangeMap + + if fromLo < fromHi { + rrm = &runeRangeMap{ + FromLo: fromLo, + FromHi: fromHi, + ToLo: toLo, + ToHi: toHi, + } + } else { + rrm = &runeRangeMap{ + FromLo: fromHi, + FromHi: fromLo, + ToLo: toHi, + ToHi: toLo, + } + } + + // If there is any single rune conflicts with this rune range, clear single rune record. + for _, r = range singleRunes { + if rrm.FromLo <= r && r <= rrm.FromHi { + if r <= unicode.MaxASCII { + tr.quickDict.Dict[r] = 0 + } else { + delete(tr.runeMap, r) + } + } + } + + tr.ranges = append(tr.ranges, rrm) + return fromHi, toHi +} + +func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) { + var r rune + var size int + + remaining = str + escaping := false + isRange := false + + for len(remaining) > 0 { + r, size = utf8.DecodeRuneInString(remaining) + remaining = remaining[size:] + + // Parse special characters. + if !escaping { + if r == '\\' { + escaping = true + continue + } + + if r == '-' { + // Ignore slash at beginning of string. + if last == utf8.RuneError { + continue + } + + start = last + isRange = true + continue + } + } + + escaping = false + + if last != utf8.RuneError { + // This is a range which start and end are the same. + // Considier it as a normal character. + if isRange && last == r { + isRange = false + continue + } + + start = last + end = r + + if isRange { + if start < end { + rangeStep = 1 + } else { + rangeStep = -1 + } + } + + return + } + + last = r + } + + start = last + end = utf8.RuneError + return +} + +// Translate str with a from/to pattern pair. +// +// See comment in Translate function for usage and samples. +func (tr *Translator) Translate(str string) string { + if !tr.hasPattern || str == "" { + return str + } + + var r rune + var size int + var needTr bool + + orig := str + + var output *stringBuilder + + for len(str) > 0 { + r, size = utf8.DecodeRuneInString(str) + r, needTr = tr.TranslateRune(r) + + if needTr && output == nil { + output = allocBuffer(orig, str) + } + + if r != utf8.RuneError && output != nil { + output.WriteRune(r) + } + + str = str[size:] + } + + // No character is translated. + if output == nil { + return orig + } + + return output.String() +} + +// TranslateRune return translated rune and true if r matches the from pattern. +// If r doesn't match the pattern, original r is returned and translated is false. +func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) { + switch { + case tr.quickDict != nil: + if r <= unicode.MaxASCII { + result = tr.quickDict.Dict[r] + + if result != 0 { + translated = true + + if tr.mappedRune >= 0 { + result = tr.mappedRune + } + + break + } + } + + fallthrough + + case tr.runeMap != nil: + var ok bool + + if result, ok = tr.runeMap[r]; ok { + translated = true + + if tr.mappedRune >= 0 { + result = tr.mappedRune + } + + break + } + + fallthrough + + default: + var rrm *runeRangeMap + ranges := tr.ranges + + for i := len(ranges) - 1; i >= 0; i-- { + rrm = ranges[i] + + if rrm.FromLo <= r && r <= rrm.FromHi { + translated = true + + if tr.mappedRune >= 0 { + result = tr.mappedRune + break + } + + if rrm.ToLo < rrm.ToHi { + result = rrm.ToLo + r - rrm.FromLo + } else if rrm.ToLo > rrm.ToHi { + // ToHi can be smaller than ToLo if range is from higher to lower. + result = rrm.ToLo - r + rrm.FromLo + } else { + result = rrm.ToLo + } + + break + } + } + } + + if tr.reverted { + if !translated { + result = tr.mappedRune + } + + translated = !translated + } + + if !translated { + result = r + } + + return +} + +// HasPattern returns true if Translator has one pattern at least. +func (tr *Translator) HasPattern() bool { + return tr.hasPattern +} + +// Translate str with the characters defined in from replaced by characters defined in to. +// +// From and to are patterns representing a set of characters. Pattern is defined as following. +// +// * Special characters +// * '-' means a range of runes, e.g. +// * "a-z" means all characters from 'a' to 'z' inclusive; +// * "z-a" means all characters from 'z' to 'a' inclusive. +// * '^' as first character means a set of all runes excepted listed, e.g. +// * "^a-z" means all characters except 'a' to 'z' inclusive. +// * '\' escapes special characters. +// * Normal character represents itself, e.g. "abc" is a set including 'a', 'b' and 'c'. +// +// Translate will try to find a 1:1 mapping from from to to. +// If to is smaller than from, last rune in to will be used to map "out of range" characters in from. +// +// Note that '^' only works in the from pattern. It will be considered as a normal character in the to pattern. +// +// If the to pattern is an empty string, Translate works exactly the same as Delete. +// +// Samples: +// Translate("hello", "aeiou", "12345") => "h2ll4" +// Translate("hello", "a-z", "A-Z") => "HELLO" +// Translate("hello", "z-a", "a-z") => "svool" +// Translate("hello", "aeiou", "*") => "h*ll*" +// Translate("hello", "^l", "*") => "**ll*" +// Translate("hello ^ world", `\^lo`, "*") => "he*** * w*r*d" +func Translate(str, from, to string) string { + tr := NewTranslator(from, to) + return tr.Translate(str) +} + +// Delete runes in str matching the pattern. +// Pattern is defined in Translate function. +// +// Samples: +// Delete("hello", "aeiou") => "hll" +// Delete("hello", "a-k") => "llo" +// Delete("hello", "^a-k") => "he" +func Delete(str, pattern string) string { + tr := NewTranslator(pattern, "") + return tr.Translate(str) +} + +// Count how many runes in str match the pattern. +// Pattern is defined in Translate function. +// +// Samples: +// Count("hello", "aeiou") => 3 +// Count("hello", "a-k") => 3 +// Count("hello", "^a-k") => 2 +func Count(str, pattern string) int { + if pattern == "" || str == "" { + return 0 + } + + var r rune + var size int + var matched bool + + tr := NewTranslator(pattern, "") + cnt := 0 + + for len(str) > 0 { + r, size = utf8.DecodeRuneInString(str) + str = str[size:] + + if _, matched = tr.TranslateRune(r); matched { + cnt++ + } + } + + return cnt +} + +// Squeeze deletes adjacent repeated runes in str. +// If pattern is not empty, only runes matching the pattern will be squeezed. +// +// Samples: +// Squeeze("hello", "") => "helo" +// Squeeze("hello", "m-z") => "hello" +// Squeeze("hello world", " ") => "hello world" +func Squeeze(str, pattern string) string { + var last, r rune + var size int + var skipSqueeze, matched bool + var tr *Translator + var output *stringBuilder + + orig := str + last = -1 + + if len(pattern) > 0 { + tr = NewTranslator(pattern, "") + } + + for len(str) > 0 { + r, size = utf8.DecodeRuneInString(str) + + // Need to squeeze the str. + if last == r && !skipSqueeze { + if tr != nil { + if _, matched = tr.TranslateRune(r); !matched { + skipSqueeze = true + } + } + + if output == nil { + output = allocBuffer(orig, str) + } + + if skipSqueeze { + output.WriteRune(r) + } + } else { + if output != nil { + output.WriteRune(r) + } + + last = r + skipSqueeze = false + } + + str = str[size:] + } + + if output == nil { + return orig + } + + return output.String() +} |