diff options
Diffstat (limited to 'vendor/github.com/prometheus/common/model/metric.go')
-rw-r--r-- | vendor/github.com/prometheus/common/model/metric.go | 368 |
1 files changed, 361 insertions, 7 deletions
diff --git a/vendor/github.com/prometheus/common/model/metric.go b/vendor/github.com/prometheus/common/model/metric.go index 00804b7fe..0bd29b3a3 100644 --- a/vendor/github.com/prometheus/common/model/metric.go +++ b/vendor/github.com/prometheus/common/model/metric.go @@ -18,15 +18,84 @@ import ( "regexp" "sort" "strings" + "unicode/utf8" + + dto "github.com/prometheus/client_model/go" + "google.golang.org/protobuf/proto" ) var ( - // MetricNameRE is a regular expression matching valid metric - // names. Note that the IsValidMetricName function performs the same - // check but faster than a match with this regular expression. - MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) + // NameValidationScheme determines the method of name validation to be used by + // all calls to IsValidMetricName() and LabelName IsValid(). Setting UTF-8 mode + // in isolation from other components that don't support UTF-8 may result in + // bugs or other undefined behavior. This value is intended to be set by + // UTF-8-aware binaries as part of their startup. To avoid need for locking, + // this value should be set once, ideally in an init(), before multiple + // goroutines are started. + NameValidationScheme = LegacyValidation + + // NameEscapingScheme defines the default way that names will be + // escaped when presented to systems that do not support UTF-8 names. If the + // Content-Type "escaping" term is specified, that will override this value. + NameEscapingScheme = ValueEncodingEscaping +) + +// ValidationScheme is a Go enum for determining how metric and label names will +// be validated by this library. +type ValidationScheme int + +const ( + // LegacyValidation is a setting that requirets that metric and label names + // conform to the original Prometheus character requirements described by + // MetricNameRE and LabelNameRE. + LegacyValidation ValidationScheme = iota + + // UTF8Validation only requires that metric and label names be valid UTF-8 + // strings. + UTF8Validation +) + +type EscapingScheme int + +const ( + // NoEscaping indicates that a name will not be escaped. Unescaped names that + // do not conform to the legacy validity check will use a new exposition + // format syntax that will be officially standardized in future versions. + NoEscaping EscapingScheme = iota + + // UnderscoreEscaping replaces all legacy-invalid characters with underscores. + UnderscoreEscaping + + // DotsEscaping is similar to UnderscoreEscaping, except that dots are + // converted to `_dot_` and pre-existing underscores are converted to `__`. + DotsEscaping + + // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid + // characters with the unicode value, surrounded by underscores. Single + // underscores are replaced with double underscores. + ValueEncodingEscaping +) + +const ( + // EscapingKey is the key in an Accept or Content-Type header that defines how + // metric and label names that do not conform to the legacy character + // requirements should be escaped when being scraped by a legacy prometheus + // system. If a system does not explicitly pass an escaping parameter in the + // Accept header, the default NameEscapingScheme will be used. + EscapingKey = "escaping" + + // Possible values for Escaping Key: + AllowUTF8 = "allow-utf-8" // No escaping required. + EscapeUnderscores = "underscores" + EscapeDots = "dots" + EscapeValues = "values" ) +// MetricNameRE is a regular expression matching valid metric +// names. Note that the IsValidMetricName function performs the same +// check but faster than a match with this regular expression. +var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) + // A Metric is similar to a LabelSet, but the key difference is that a Metric is // a singleton and refers to one and only one stream of samples. type Metric LabelSet @@ -86,17 +155,302 @@ func (m Metric) FastFingerprint() Fingerprint { return LabelSet(m).FastFingerprint() } -// IsValidMetricName returns true iff name matches the pattern of MetricNameRE. +// IsValidMetricName returns true iff name matches the pattern of MetricNameRE +// for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is +// selected. +func IsValidMetricName(n LabelValue) bool { + switch NameValidationScheme { + case LegacyValidation: + return IsValidLegacyMetricName(n) + case UTF8Validation: + if len(n) == 0 { + return false + } + return utf8.ValidString(string(n)) + default: + panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme)) + } +} + +// IsValidLegacyMetricName is similar to IsValidMetricName but always uses the +// legacy validation scheme regardless of the value of NameValidationScheme. // This function, however, does not use MetricNameRE for the check but a much // faster hardcoded implementation. -func IsValidMetricName(n LabelValue) bool { +func IsValidLegacyMetricName(n LabelValue) bool { if len(n) == 0 { return false } for i, b := range n { - if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0)) { + if !isValidLegacyRune(b, i) { return false } } return true } + +// EscapeMetricFamily escapes the given metric names and labels with the given +// escaping scheme. Returns a new object that uses the same pointers to fields +// when possible and creates new escaped versions so as not to mutate the +// input. +func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily { + if v == nil { + return nil + } + + if scheme == NoEscaping { + return v + } + + out := &dto.MetricFamily{ + Help: v.Help, + Type: v.Type, + } + + // If the name is nil, copy as-is, don't try to escape. + if v.Name == nil || IsValidLegacyMetricName(LabelValue(v.GetName())) { + out.Name = v.Name + } else { + out.Name = proto.String(EscapeName(v.GetName(), scheme)) + } + for _, m := range v.Metric { + if !metricNeedsEscaping(m) { + out.Metric = append(out.Metric, m) + continue + } + + escaped := &dto.Metric{ + Gauge: m.Gauge, + Counter: m.Counter, + Summary: m.Summary, + Untyped: m.Untyped, + Histogram: m.Histogram, + TimestampMs: m.TimestampMs, + } + + for _, l := range m.Label { + if l.GetName() == MetricNameLabel { + if l.Value == nil || IsValidLegacyMetricName(LabelValue(l.GetValue())) { + escaped.Label = append(escaped.Label, l) + continue + } + escaped.Label = append(escaped.Label, &dto.LabelPair{ + Name: proto.String(MetricNameLabel), + Value: proto.String(EscapeName(l.GetValue(), scheme)), + }) + continue + } + if l.Name == nil || IsValidLegacyMetricName(LabelValue(l.GetName())) { + escaped.Label = append(escaped.Label, l) + continue + } + escaped.Label = append(escaped.Label, &dto.LabelPair{ + Name: proto.String(EscapeName(l.GetName(), scheme)), + Value: l.Value, + }) + } + out.Metric = append(out.Metric, escaped) + } + return out +} + +func metricNeedsEscaping(m *dto.Metric) bool { + for _, l := range m.Label { + if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(LabelValue(l.GetValue())) { + return true + } + if !IsValidLegacyMetricName(LabelValue(l.GetName())) { + return true + } + } + return false +} + +const ( + lowerhex = "0123456789abcdef" +) + +// EscapeName escapes the incoming name according to the provided escaping +// scheme. Depending on the rules of escaping, this may cause no change in the +// string that is returned. (Especially NoEscaping, which by definition is a +// noop). This function does not do any validation of the name. +func EscapeName(name string, scheme EscapingScheme) string { + if len(name) == 0 { + return name + } + var escaped strings.Builder + switch scheme { + case NoEscaping: + return name + case UnderscoreEscaping: + if IsValidLegacyMetricName(LabelValue(name)) { + return name + } + for i, b := range name { + if isValidLegacyRune(b, i) { + escaped.WriteRune(b) + } else { + escaped.WriteRune('_') + } + } + return escaped.String() + case DotsEscaping: + // Do not early return for legacy valid names, we still escape underscores. + for i, b := range name { + if b == '_' { + escaped.WriteString("__") + } else if b == '.' { + escaped.WriteString("_dot_") + } else if isValidLegacyRune(b, i) { + escaped.WriteRune(b) + } else { + escaped.WriteRune('_') + } + } + return escaped.String() + case ValueEncodingEscaping: + if IsValidLegacyMetricName(LabelValue(name)) { + return name + } + escaped.WriteString("U__") + for i, b := range name { + if isValidLegacyRune(b, i) { + escaped.WriteRune(b) + } else if !utf8.ValidRune(b) { + escaped.WriteString("_FFFD_") + } else if b < 0x100 { + escaped.WriteRune('_') + for s := 4; s >= 0; s -= 4 { + escaped.WriteByte(lowerhex[b>>uint(s)&0xF]) + } + escaped.WriteRune('_') + } else if b < 0x10000 { + escaped.WriteRune('_') + for s := 12; s >= 0; s -= 4 { + escaped.WriteByte(lowerhex[b>>uint(s)&0xF]) + } + escaped.WriteRune('_') + } + } + return escaped.String() + default: + panic(fmt.Sprintf("invalid escaping scheme %d", scheme)) + } +} + +// lower function taken from strconv.atoi +func lower(c byte) byte { + return c | ('x' - 'X') +} + +// UnescapeName unescapes the incoming name according to the provided escaping +// scheme if possible. Some schemes are partially or totally non-roundtripable. +// If any error is enountered, returns the original input. +func UnescapeName(name string, scheme EscapingScheme) string { + if len(name) == 0 { + return name + } + switch scheme { + case NoEscaping: + return name + case UnderscoreEscaping: + // It is not possible to unescape from underscore replacement. + return name + case DotsEscaping: + name = strings.ReplaceAll(name, "_dot_", ".") + name = strings.ReplaceAll(name, "__", "_") + return name + case ValueEncodingEscaping: + escapedName, found := strings.CutPrefix(name, "U__") + if !found { + return name + } + + var unescaped strings.Builder + TOP: + for i := 0; i < len(escapedName); i++ { + // All non-underscores are treated normally. + if escapedName[i] != '_' { + unescaped.WriteByte(escapedName[i]) + continue + } + i++ + if i >= len(escapedName) { + return name + } + // A double underscore is a single underscore. + if escapedName[i] == '_' { + unescaped.WriteByte('_') + continue + } + // We think we are in a UTF-8 code, process it. + var utf8Val uint + for j := 0; i < len(escapedName); j++ { + // This is too many characters for a utf8 value. + if j > 4 { + return name + } + // Found a closing underscore, convert to a rune, check validity, and append. + if escapedName[i] == '_' { + utf8Rune := rune(utf8Val) + if !utf8.ValidRune(utf8Rune) { + return name + } + unescaped.WriteRune(utf8Rune) + continue TOP + } + r := lower(escapedName[i]) + utf8Val *= 16 + if r >= '0' && r <= '9' { + utf8Val += uint(r) - '0' + } else if r >= 'a' && r <= 'f' { + utf8Val += uint(r) - 'a' + 10 + } else { + return name + } + i++ + } + // Didn't find closing underscore, invalid. + return name + } + return unescaped.String() + default: + panic(fmt.Sprintf("invalid escaping scheme %d", scheme)) + } +} + +func isValidLegacyRune(b rune, i int) bool { + return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0) +} + +func (e EscapingScheme) String() string { + switch e { + case NoEscaping: + return AllowUTF8 + case UnderscoreEscaping: + return EscapeUnderscores + case DotsEscaping: + return EscapeDots + case ValueEncodingEscaping: + return EscapeValues + default: + panic(fmt.Sprintf("unknown format scheme %d", e)) + } +} + +func ToEscapingScheme(s string) (EscapingScheme, error) { + if s == "" { + return NoEscaping, fmt.Errorf("got empty string instead of escaping scheme") + } + switch s { + case AllowUTF8: + return NoEscaping, nil + case EscapeUnderscores: + return UnderscoreEscaping, nil + case EscapeDots: + return DotsEscaping, nil + case EscapeValues: + return ValueEncodingEscaping, nil + default: + return NoEscaping, fmt.Errorf("unknown format scheme " + s) + } +} |