summaryrefslogtreecommitdiff
path: root/vendor/github.com/rivo/uniseg/sentencerules.go
diff options
context:
space:
mode:
authorLibravatar Vyr Cossont <VyrCossont@users.noreply.github.com>2025-01-23 16:47:30 -0800
committerLibravatar GitHub <noreply@github.com>2025-01-23 16:47:30 -0800
commit5b765d734ee70f0a8a0790444d60969a727567f8 (patch)
treef76e05a6e5b22df17160be595c40e964bdbe5f22 /vendor/github.com/rivo/uniseg/sentencerules.go
parent[feature] Serve bot accounts over AP as Service instead of Person (#3672) (diff)
downloadgotosocial-5b765d734ee70f0a8a0790444d60969a727567f8.tar.xz
[feature] Push notifications (#3587)
* Update push subscription API model to be Mastodon 4.0 compatible * Add webpush-go dependency # Conflicts: # go.sum * Single-row table for storing instance's VAPID key pair * Generate VAPID key pair during startup * Add VAPID public key to instance info API * Return VAPID public key when registering an app * Store Web Push subscriptions in DB * Add Web Push sender (similar to email sender) * Add no-op push senders to most processor tests * Test Web Push notifications from workers * Delete Web Push subscriptions when account is deleted * Implement push subscription API * Linter fixes * Update Swagger * Fix enum to int migration * Fix GetVAPIDKeyPair * Create web push subscriptions table with indexes * Log Web Push server error messages * Send instance URL as Web Push JWT subject * Accept any 2xx code as a success * Fix malformed VAPID sub claim * Use packed notification flags * Remove unused date columns * Add notification type for update notifications Not used yet * Make GetVAPIDKeyPair idempotent and remove PutVAPIDKeyPair * Post-rebase fixes * go mod tidy * Special-case 400 errors other than 408/429 Most client errors should remove the subscription. * Improve titles, trim body to reasonable length * Disallow cleartext HTTP for Web Push servers * Fix lint * Remove redundant index on unique column Also removes redundant unique and notnull tags on ID column since these are implied by pk * Make realsender.go more readable * Use Tobi's style for wrapping errors * Restore treating all 5xx codes as temporary problems * Always load target account settings * Stub `policy` and `standard` * webpush.Sender: take type converter as ctor param * Move webpush.MockSender and noopSender into testrig
Diffstat (limited to 'vendor/github.com/rivo/uniseg/sentencerules.go')
-rw-r--r--vendor/github.com/rivo/uniseg/sentencerules.go276
1 files changed, 276 insertions, 0 deletions
diff --git a/vendor/github.com/rivo/uniseg/sentencerules.go b/vendor/github.com/rivo/uniseg/sentencerules.go
new file mode 100644
index 000000000..0b29c7bdb
--- /dev/null
+++ b/vendor/github.com/rivo/uniseg/sentencerules.go
@@ -0,0 +1,276 @@
+package uniseg
+
+import "unicode/utf8"
+
+// The states of the sentence break parser.
+const (
+ sbAny = iota
+ sbCR
+ sbParaSep
+ sbATerm
+ sbUpper
+ sbLower
+ sbSB7
+ sbSB8Close
+ sbSB8Sp
+ sbSTerm
+ sbSB8aClose
+ sbSB8aSp
+)
+
+// sbTransitions implements the sentence break parser's state transitions. It's
+// anologous to [grTransitions], see comments there for details.
+//
+// Unicode version 15.0.0.
+func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) {
+ switch uint64(state) | uint64(prop)<<32 {
+ // SB3.
+ case sbAny | prCR<<32:
+ return sbCR, false, 9990
+ case sbCR | prLF<<32:
+ return sbParaSep, false, 30
+
+ // SB4.
+ case sbAny | prSep<<32:
+ return sbParaSep, false, 9990
+ case sbAny | prLF<<32:
+ return sbParaSep, false, 9990
+ case sbParaSep | prAny<<32:
+ return sbAny, true, 40
+ case sbCR | prAny<<32:
+ return sbAny, true, 40
+
+ // SB6.
+ case sbAny | prATerm<<32:
+ return sbATerm, false, 9990
+ case sbATerm | prNumeric<<32:
+ return sbAny, false, 60
+ case sbSB7 | prNumeric<<32:
+ return sbAny, false, 60 // Because ATerm also appears in SB7.
+
+ // SB7.
+ case sbAny | prUpper<<32:
+ return sbUpper, false, 9990
+ case sbAny | prLower<<32:
+ return sbLower, false, 9990
+ case sbUpper | prATerm<<32:
+ return sbSB7, false, 70
+ case sbLower | prATerm<<32:
+ return sbSB7, false, 70
+ case sbSB7 | prUpper<<32:
+ return sbUpper, false, 70
+
+ // SB8a.
+ case sbAny | prSTerm<<32:
+ return sbSTerm, false, 9990
+ case sbATerm | prSContinue<<32:
+ return sbAny, false, 81
+ case sbATerm | prATerm<<32:
+ return sbATerm, false, 81
+ case sbATerm | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB7 | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB7 | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB7 | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8Close | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8Close | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8Close | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8Sp | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8Sp | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8Sp | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSTerm | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSTerm | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSTerm | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8aClose | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8aClose | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8aClose | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8aSp | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8aSp | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8aSp | prSTerm<<32:
+ return sbSTerm, false, 81
+
+ // SB9.
+ case sbATerm | prClose<<32:
+ return sbSB8Close, false, 90
+ case sbSB7 | prClose<<32:
+ return sbSB8Close, false, 90
+ case sbSB8Close | prClose<<32:
+ return sbSB8Close, false, 90
+ case sbATerm | prSp<<32:
+ return sbSB8Sp, false, 90
+ case sbSB7 | prSp<<32:
+ return sbSB8Sp, false, 90
+ case sbSB8Close | prSp<<32:
+ return sbSB8Sp, false, 90
+ case sbSTerm | prClose<<32:
+ return sbSB8aClose, false, 90
+ case sbSB8aClose | prClose<<32:
+ return sbSB8aClose, false, 90
+ case sbSTerm | prSp<<32:
+ return sbSB8aSp, false, 90
+ case sbSB8aClose | prSp<<32:
+ return sbSB8aSp, false, 90
+ case sbATerm | prSep<<32:
+ return sbParaSep, false, 90
+ case sbATerm | prCR<<32:
+ return sbParaSep, false, 90
+ case sbATerm | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSB7 | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSB7 | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSB7 | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSB8Close | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSB8Close | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSB8Close | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSTerm | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSTerm | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSTerm | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSB8aClose | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSB8aClose | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSB8aClose | prLF<<32:
+ return sbParaSep, false, 90
+
+ // SB10.
+ case sbSB8Sp | prSp<<32:
+ return sbSB8Sp, false, 100
+ case sbSB8aSp | prSp<<32:
+ return sbSB8aSp, false, 100
+ case sbSB8Sp | prSep<<32:
+ return sbParaSep, false, 100
+ case sbSB8Sp | prCR<<32:
+ return sbParaSep, false, 100
+ case sbSB8Sp | prLF<<32:
+ return sbParaSep, false, 100
+
+ // SB11.
+ case sbATerm | prAny<<32:
+ return sbAny, true, 110
+ case sbSB7 | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8Close | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8Sp | prAny<<32:
+ return sbAny, true, 110
+ case sbSTerm | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8aClose | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8aSp | prAny<<32:
+ return sbAny, true, 110
+ // We'll always break after ParaSep due to SB4.
+
+ default:
+ return -1, false, -1
+ }
+}
+
+// transitionSentenceBreakState determines the new state of the sentence break
+// parser given the current state and the next code point. It also returns
+// whether a sentence boundary was detected. If more than one code point is
+// needed to determine the new state, the byte slice or the string starting
+// after rune "r" can be used (whichever is not nil or empty) for further
+// lookups.
+func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newState int, sentenceBreak bool) {
+ // Determine the property of the next character.
+ nextProperty := property(sentenceBreakCodePoints, r)
+
+ // SB5 (Replacing Ignore Rules).
+ if nextProperty == prExtend || nextProperty == prFormat {
+ if state == sbParaSep || state == sbCR {
+ return sbAny, true // Make sure we don't apply SB5 to SB3 or SB4.
+ }
+ if state < 0 {
+ return sbAny, true // SB1.
+ }
+ return state, false
+ }
+
+ // Find the applicable transition in the table.
+ var rule int
+ newState, sentenceBreak, rule = sbTransitions(state, nextProperty)
+ if newState < 0 {
+ // No specific transition found. Try the less specific ones.
+ anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny)
+ anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty)
+ if anyPropState >= 0 && anyStateState >= 0 {
+ // Both apply. We'll use a mix (see comments for grTransitions).
+ newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
+ if anyPropRule < anyStateRule {
+ sentenceBreak, rule = anyPropProp, anyPropRule
+ }
+ } else if anyPropState >= 0 {
+ // We only have a specific state.
+ newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule
+ // This branch will probably never be reached because okAnyState will
+ // always be true given the current transition map. But we keep it here
+ // for future modifications to the transition map where this may not be
+ // true anymore.
+ } else if anyStateState >= 0 {
+ // We only have a specific property.
+ newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
+ } else {
+ // No known transition. SB999: Any × Any.
+ newState, sentenceBreak, rule = sbAny, false, 9990
+ }
+ }
+
+ // SB8.
+ if rule > 80 && (state == sbATerm || state == sbSB8Close || state == sbSB8Sp || state == sbSB7) {
+ // Check the right side of the rule.
+ var length int
+ for nextProperty != prOLetter &&
+ nextProperty != prUpper &&
+ nextProperty != prLower &&
+ nextProperty != prSep &&
+ nextProperty != prCR &&
+ nextProperty != prLF &&
+ nextProperty != prATerm &&
+ nextProperty != prSTerm {
+ // Move on to the next rune.
+ if b != nil { // Byte slice version.
+ r, length = utf8.DecodeRune(b)
+ b = b[length:]
+ } else { // String version.
+ r, length = utf8.DecodeRuneInString(str)
+ str = str[length:]
+ }
+ if r == utf8.RuneError {
+ break
+ }
+ nextProperty = property(sentenceBreakCodePoints, r)
+ }
+ if nextProperty == prLower {
+ return sbLower, false
+ }
+ }
+
+ return
+}