author | Alberto Bertogli
<albertito@blitiri.com.ar> 2020-08-19 19:42:28 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2020-08-21 11:07:33 UTC |
parent | 7e412db19b4a93146368af3c5740d570207754a6 |
chasquid.go | +3 | -1 |
docs/monitoring.md | +8 | -115 |
internal/aliases/aliases.go | +3 | -2 |
internal/courier/smtp.go | +10 | -6 |
internal/expvarom/expvarom.go | +224 | -0 |
internal/expvarom/expvarom_test.go | +127 | -0 |
internal/queue/queue.go | +9 | -5 |
internal/smtpsrv/conn.go | +15 | -8 |
internal/sts/sts.go | +26 | -15 |
monitoring.go | +8 | -2 |
test/cover.sh | +11 | -4 |
test/t-09-loop/run.sh | +7 | -0 |
diff --git a/chasquid.go b/chasquid.go index dbd16a6..a1d195d 100644 --- a/chasquid.go +++ b/chasquid.go @@ -22,6 +22,7 @@ import ( "blitiri.com.ar/go/chasquid/internal/config" "blitiri.com.ar/go/chasquid/internal/courier" "blitiri.com.ar/go/chasquid/internal/dovecot" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/chasquid/internal/maillog" "blitiri.com.ar/go/chasquid/internal/normalize" "blitiri.com.ar/go/chasquid/internal/smtpsrv" @@ -52,7 +53,8 @@ var ( sourceDate time.Time sourceDateVar = expvar.NewString("chasquid/sourceDateStr") - sourceDateTsVar = expvar.NewInt("chasquid/sourceDateTimestamp") + sourceDateTsVar = expvarom.NewInt("chasquid/sourceDateTimestamp", + "timestamp when the binary was built, in seconds since epoch") ) func main() { diff --git a/docs/monitoring.md b/docs/monitoring.md index 0582e7d..731c079 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -23,11 +23,16 @@ These include: ## Variables chasquid exports some variables for monitoring, via the standard -[expvar](https://golang.org/pkg/expvar/) package, which can be useful for +[expvar](https://golang.org/pkg/expvar/) package and the +[OpenMetrics](https://openmetrics.io/) text format, which can be useful for whitebox monitoring. -They're accessible over the monitoring http server, at `/debug/vars` (default -endpoint for expvars). +They're accessible on the monitoring HTTP server, at `/debug/vars` (default +endpoint for expvars) and `/metrics` (common endpoint for openmetrics). + +<a name="prometheus"></a> +The `/metrics` endpoint is also compatible with +[Prometheus](https://prometheus.io/). *Note these are still subject to change, although breaking changes will be avoided whenever possible, and will be noted in the [release @@ -91,115 +96,3 @@ List of exported variables: - **chasquid/sts/cache/unmarshalErrors** (counter): count of unmarshaling errors as part of keeping the STS cache. - **chasquid/version** (string): version string. - - -## Prometheus - -To monitor chasquid using [Prometheus](https://prometheus.io), you can use the -[prometheus-expvar-exporter](https://blitiri.com.ar/git/r/prometheus-expvar-exporter/b/master/t/f=README.md.html) -with the following configuration: - -```toml -# Address to listen on. Prometheus should be told to scrape this. -listen_addr = ":8000" - -[chasquid] -# Replace with the address of chasquid's monitoring server. -url = "http://localhost:1099/debug/vars" - -# Metrics are auto-imported, but some can't be; in particular the ones with -# labels need explicit definitions here. - -m.aliases_hook_results.expvar ="chasquid/aliases/hookResults" -m.aliases_hook_results.help ="aliases hook results" -m.aliases_hook_results.label_name ="result" - -m.deliver_attempts.expvar = "chasquid/queue/deliverAttempts" -m.deliver_attempts.help = "attempts to deliver mail" -m.deliver_attempts.label_name = "recipient_type" - -m.dsn_queued.expvar = "chasquid/queue/dsnQueued" -m.dsn_queued.help = "DSN queued" - -m.items_written.expvar = "chasquid/queue/itemsWritten" -m.items_written.help = "items written" - -m.queue_puts.expvar = "chasquid/queue/putCount" -m.queue_puts.help = "chasquid/queue/putCount" - -m.smtpin_commands.expvar = "chasquid/smtpIn/commandCount" -m.smtpin_commands.help = "incoming SMTP command count" -m.smtpin_commands.label_name = "command" - -m.smtp_hook_results.expvar = "chasquid/smtpIn/hookResults" -m.smtp_hook_results.help = "hook invocation results" -m.smtp_hook_results.label_name = "result" - -m.loops_detected.expvar = "chasquid/smtpIn/loopsDetected" -m.loops_detected.help = "loops detected" - -m.smtp_response_codes.expvar = "chasquid/smtpIn/responseCodeCount" -m.smtp_response_codes.help = "response codes returned to SMTP commands" -m.smtp_response_codes.label_name = "code" - -m.in_sec_level_checks.expvar = "chasquid/smtpIn/securityLevelChecks" -m.in_sec_level_checks.help = "incoming security level check results" -m.in_sec_level_checks.label_name = "result" - -m.spf_results.expvar = "chasquid/smtpIn/spfResultCount" -m.spf_results.help = "SPF result count" -m.spf_results.label_name = "result" - -m.in_tls_usage.expvar = "chasquid/smtpIn/tlsCount" -m.in_tls_usage.help = "count of TLS usage in incoming connections" -m.in_tls_usage.label_name = "status" - -m.out_sec_level_checks.expvar = "chasquid/smtpOut/securityLevelChecks" -m.out_sec_level_checks.help = "outgoing security level check results" -m.out_sec_level_checks.label_name = "result" - -m.sts_modes.expvar = "chasquid/smtpOut/sts/mode" -m.sts_modes.help = "STS checks on outgoing connections, by mode" -m.sts_modes.label_name = "mode" - -m.sts_security.expvar = "chasquid/smtpOut/sts/security" -m.sts_security.help = "STS security checks on outgoing connections, by result" -m.sts_security.label_name = "result" - -m.out_tls_usage.expvar = "chasquid/smtpOut/tlsCount" -m.out_tls_usage.help = "count of TLS usage in outgoing connections" -m.out_tls_usage.label_name = "status" - -m.sts_cache_expired.expvar = "chasquid/sts/cache/expired" -m.sts_cache_expired.help = "expired entries in the STS cache" - -m.sts_cache_failed_fetch.expvar = "chasquid/sts/cache/failedFetch" -m.sts_cache_failed_fetch.help = "failed fetches in the STS cache" - -m.sts_cache_fetches.expvar = "chasquid/sts/cache/fetches" -m.sts_cache_fetches.help = "total fetches in the STS cache" - -m.sts_cache_hits.expvar = "chasquid/sts/cache/hits" -m.sts_cache_hits.help = "hits in the STS cache" - -m.sts_cache_invalid.expvar = "chasquid/sts/cache/invalid" -m.sts_cache_invalid.help = "invalid policies in the STS cache" - -m.sts_cache_io_errors.expvar = "chasquid/sts/cache/ioErrors" -m.sts_cache_io_errors.help = "I/O errors when maintaining STS cache" - -m.sts_cache_marshal_errors.expvar = "chasquid/sts/cache/marshalErrors" -m.sts_cache_marshal_errors.help = "marshalling errors when maintaining STS cache" - -m.sts_cache_refresh_cycles.expvar = "chasquid/sts/cache/refreshCycles" -m.sts_cache_refresh_cycles.help = "STS cache refresh cycles" - -m.sts_cache_refresh_errors.expvar = "chasquid/sts/cache/refreshErrors" -m.sts_cache_refresh_errors.help = "STS cache refresh errors" - -m.sts_cache_refreshes.expvar = "chasquid/sts/cache/refreshes" -m.sts_cache_refreshes.help = "count of STS cache refreshes" - -m.sts_cache_unmarshal_errors.expvar = "chasquid/sts/cache/unmarshalErrors" -m.sts_cache_unmarshal_errors.help = "unmarshalling errors in STS cache" -``` diff --git a/internal/aliases/aliases.go b/internal/aliases/aliases.go index fc29951..897659f 100644 --- a/internal/aliases/aliases.go +++ b/internal/aliases/aliases.go @@ -56,7 +56,6 @@ package aliases import ( "bufio" "context" - "expvar" "fmt" "io" "os" @@ -66,13 +65,15 @@ import ( "time" "blitiri.com.ar/go/chasquid/internal/envelope" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/chasquid/internal/normalize" "blitiri.com.ar/go/chasquid/internal/trace" ) // Exported variables. var ( - hookResults = expvar.NewMap("chasquid/aliases/hookResults") + hookResults = expvarom.NewMap("chasquid/aliases/hookResults", + "result", "count of aliases hook results, by hook and result") ) // Recipient represents a single recipient, after resolving aliases. diff --git a/internal/courier/smtp.go b/internal/courier/smtp.go index f09e100..6a93c15 100644 --- a/internal/courier/smtp.go +++ b/internal/courier/smtp.go @@ -3,7 +3,6 @@ package courier import ( "context" "crypto/tls" - "expvar" "flag" "net" "time" @@ -12,6 +11,7 @@ import ( "blitiri.com.ar/go/chasquid/internal/domaininfo" "blitiri.com.ar/go/chasquid/internal/envelope" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/chasquid/internal/smtp" "blitiri.com.ar/go/chasquid/internal/sts" "blitiri.com.ar/go/chasquid/internal/trace" @@ -35,11 +35,15 @@ var ( // Exported variables. var ( - tlsCount = expvar.NewMap("chasquid/smtpOut/tlsCount") - slcResults = expvar.NewMap("chasquid/smtpOut/securityLevelChecks") - - stsSecurityModes = expvar.NewMap("chasquid/smtpOut/sts/mode") - stsSecurityResults = expvar.NewMap("chasquid/smtpOut/sts/security") + tlsCount = expvarom.NewMap("chasquid/smtpOut/tlsCount", + "result", "count of TLS status on outgoing connections") + slcResults = expvarom.NewMap("chasquid/smtpOut/securityLevelChecks", + "result", "count of security level checks on outgoing connections") + + stsSecurityModes = expvarom.NewMap("chasquid/smtpOut/sts/mode", + "mode", "count of STS checks on outgoing connections") + stsSecurityResults = expvarom.NewMap("chasquid/smtpOut/sts/security", + "result", "count of STS security checks on outgoing connections") ) // SMTP delivers remote mail via outgoing SMTP. diff --git a/internal/expvarom/expvarom.go b/internal/expvarom/expvarom.go new file mode 100644 index 0000000..1d02fd7 --- /dev/null +++ b/internal/expvarom/expvarom.go @@ -0,0 +1,224 @@ +// Package expvarom implements an OpenMetrics HTTP exporter for the variables +// from the expvar package. +// +// This is useful for small servers that want to support both packages with +// simple enough variables, without introducing any dependencies beyond the +// standard library. +// +// Some functions to add descriptions and map labels are exported for +// convenience, but their usage is optional. +// +// For more complex usage (like histograms, counters vs. gauges, etc.), use +// the OpenMetrics libraries directly. +// +// The exporter uses the text-based format, as documented in: +// https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format +// +// Note the adoption of that format as OpenMetrics' one isn't finalized yet, +// and it is possible that it will change in the future. +// +// Backwards compatibility is NOT guaranteed, until the format is fully +// standarized. +package expvarom + +import ( + "expvar" + "fmt" + "io" + "net/http" + "sort" + "strconv" + "strings" + "sync" + "unicode/utf8" +) + +type exportedVar struct { + Name string + Desc string + LabelName string + + I *expvar.Int + F *expvar.Float + M *expvar.Map +} + +var ( + infoMu = sync.Mutex{} + descriptions = map[string]string{} + mapLabelNames = map[string]string{} +) + +// MetricsHandler implements an http.HandlerFunc which serves the registered +// metrics, using the OpenMetrics text-based format. +func MetricsHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain; version=0.0.4") + + vars := []exportedVar{} + ignored := []string{} + expvar.Do(func(kv expvar.KeyValue) { + evar := exportedVar{ + Name: metricNameToOM(kv.Key), + } + switch value := kv.Value.(type) { + case *expvar.Int: + evar.I = value + case *expvar.Float: + evar.F = value + case *expvar.Map: + evar.M = value + default: + // Unsupported type, ignore this variable. + ignored = append(ignored, evar.Name) + return + } + + infoMu.Lock() + evar.Desc = descriptions[kv.Key] + evar.LabelName = mapLabelNames[kv.Key] + infoMu.Unlock() + + // OM maps need a label name, while expvar ones do not. If we weren't + // told what to use, use a generic "key". + if evar.LabelName == "" { + evar.LabelName = "key" + } + + vars = append(vars, evar) + }) + + // Sort the variables for reproducibility and readability. + sort.Slice(vars, func(i, j int) bool { + return vars[i].Name < vars[j].Name + }) + + for _, v := range vars { + writeVar(w, &v) + } + + fmt.Fprintf(w, "# Generated by expvarom\n") + fmt.Fprintf(w, "# EXPERIMENTAL - Format is not fully standard yet\n") + fmt.Fprintf(w, "# Ignored variables: %q\n", ignored) +} + +func writeVar(w io.Writer, v *exportedVar) { + if v.Desc != "" { + fmt.Fprintf(w, "# HELP %s %s\n", v.Name, v.Desc) + } + + if v.I != nil { + fmt.Fprintf(w, "%s %d\n\n", v.Name, v.I.Value()) + return + } + + if v.F != nil { + fmt.Fprintf(w, "%s %g\n\n", v.Name, v.F.Value()) + return + } + + if v.M != nil { + count := 0 + v.M.Do(func(kv expvar.KeyValue) { + vs := "" + switch value := kv.Value.(type) { + case *expvar.Int: + vs = strconv.FormatInt(value.Value(), 10) + case *expvar.Float: + vs = strconv.FormatFloat(value.Value(), 'g', -1, 64) + default: + // We only support Int and Float in maps. + return + } + + labelValue := quoteLabelValue(kv.Key) + + fmt.Fprintf(w, "%s{%s=%s} %s\n", + v.Name, v.LabelName, labelValue, vs) + count++ + }) + if count > 0 { + fmt.Fprintf(w, "\n") + } + } +} + +// metricNameToOM converts an expvar metric name into an OpenMetrics-compliant +// metric name. The latter is more restrictive, as it must match the regexp +// "[a-zA-Z_:][a-zA-Z0-9_:]*", AND the ':' is not allowed for a direct +// exporter. +// +// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels +func metricNameToOM(name string) string { + n := "" + for _, c := range name { + if (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '_' { + n += string(c) + } else { + n += "_" + } + } + + // If it begins with a number, prepend 'i' as a compromise. + if len(n) > 0 && n[0] >= '0' && n[0] <= '9' { + n = "i" + n + } + + return n +} + +// According to the spec, we only need to replace these 3 characters in label +// values. +var labelValueReplacer = strings.NewReplacer( + `\`, `\\`, + `"`, `\"`, + "\n", `\n`) + +// quoteLabelValue takes an arbitrary string, and quotes it so it can be +// used as a label value. Output includes the wrapping `"`. +func quoteLabelValue(v string) string { + // The spec requires label values to be valid UTF8, with `\`, `"` and "\n" + // escaped. If it's invalid UTF8, hard-quote it first. This will result + // in uglier looking values, but they will be well formed. + if !utf8.ValidString(v) { + v = strconv.QuoteToASCII(v) + v = v[1 : len(v)-1] + } + + return `"` + labelValueReplacer.Replace(v) + `"` +} + +// NewInt registers a new expvar.Int variable, with the given description. +func NewInt(name, desc string) *expvar.Int { + infoMu.Lock() + descriptions[name] = desc + infoMu.Unlock() + return expvar.NewInt(name) +} + +// NewFloat registers a new expvar.Float variable, with the given description. +func NewFloat(name, desc string) *expvar.Float { + infoMu.Lock() + descriptions[name] = desc + infoMu.Unlock() + return expvar.NewFloat(name) +} + +// NewMap registers a new expvar.Map variable, with the given label +// name and description. +func NewMap(name, labelName, desc string) *expvar.Map { + // Prevent accidents when using the description as the label name. + if strings.Contains(labelName, " ") { + panic(fmt.Sprintf( + "label name has spaces, mix up with the description? %q", + labelName)) + } + + infoMu.Lock() + descriptions[name] = desc + mapLabelNames[name] = labelName + infoMu.Unlock() + return expvar.NewMap(name) +} diff --git a/internal/expvarom/expvarom_test.go b/internal/expvarom/expvarom_test.go new file mode 100644 index 0000000..4ce3c1e --- /dev/null +++ b/internal/expvarom/expvarom_test.go @@ -0,0 +1,127 @@ +package expvarom + +import ( + "expvar" + "io/ioutil" + "net/http/httptest" + "testing" + + "github.com/google/go-cmp/cmp" +) + +var ( + testI1 = NewInt("testI1", "int test var") + testI2 = expvar.NewInt("testI2") + + testF = NewFloat("testF", "float test var") + + testMI = NewMap("testMI", "label", "int map test var") + testMF = NewMap("testMF", "label", "float map test var") + testMXI = expvar.NewMap("testMXI") + testMXF = expvar.NewMap("testMXF") + + testMEmpty = expvar.NewMap("testMEmpty") //nolint // Unused. + + testMOther = expvar.NewMap("testMOther") + + testS = expvar.NewString("testS") + + // Naming test cases. + testN1 = expvar.NewInt("name/1z") + testN2 = NewInt("name$2", "name with $") + testN3 = expvar.NewInt("3name") + testN4 = expvar.NewInt("nAme_4Z") + testN5 = expvar.NewInt("ñame_5") +) + +const expected string = `_ame_5 5 + +i3name 3 + +nAme_4Z 4 + +name_1z 1 + +# HELP name_2 name with $ +name_2 2 + +# HELP testF float test var +testF 3.43434 + +# HELP testI1 int test var +testI1 1 + +testI2 2 + +# HELP testMF float map test var +testMF{label="key2.0"} 6.6 +testMF{label="key2.1"} 6.61 +testMF{label="key2.2-ñaca"} 6.62 +testMF{label="key2.3-a\\b"} 6.63 +testMF{label="key2.4- "} 6.64 +testMF{label="key2.5-a\nb"} 6.65 +testMF{label="key2.6-a\"b"} 6.66 +testMF{label="key2.7-\\u00f1aca-A\\t\\xff\\xfe\\xfdB"} 6.67 + +# HELP testMI int map test var +testMI{label="key1"} 5 + +testMXF{key="key4"} 8e-08 + +testMXI{key="key3"} 7 + +# Generated by expvarom +# EXPERIMENTAL - Format is not fully standard yet +# Ignored variables: ["cmdline" "memstats" "testS"] +` + +func TestHandler(t *testing.T) { + testI1.Add(1) + testI2.Add(2) + testF.Add(3.43434) + testMI.Add("key1", 5) + + // Test some strange keys in this map to check they're escaped properly. + testMF.AddFloat("key2.0", 6.60) + testMF.AddFloat("key2.1", 6.61) + testMF.AddFloat("key2.2-ñaca", 6.62) + testMF.AddFloat(`key2.3-a\b`, 6.63) + testMF.AddFloat("key2.4-\t", 6.64) + testMF.AddFloat("key2.5-a\nb", 6.65) + testMF.AddFloat(`key2.6-a"b`, 6.66) + testMF.AddFloat("key2.7-ñaca-A\t\xff\xfe\xfdB", 6.67) // Invalid utf8. + + testMXI.Add("key3", 7) + testMXF.AddFloat("key4", 8e-8) + testS.Set("lalala") + + testN1.Add(1) + testN2.Add(2) + testN3.Add(3) + testN4.Add(4) + testN5.Add(5) + + // Map with an unsupported type. + testMOther.Set("keyX", &expvar.String{}) + + req := httptest.NewRequest("get", "/metrics", nil) + w := httptest.NewRecorder() + MetricsHandler(w, req) + + resp := w.Result() + body, _ := ioutil.ReadAll(resp.Body) + + if diff := cmp.Diff(expected, string(body)); diff != "" { + t.Errorf("MetricsHandler() mismatch (-want +got):\n%s", diff) + } +} + +func TestMapLabelAccident(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Errorf("NewMap did not panic as expected") + } + }() + + NewMap("name", "label with spaces", "description") +} diff --git a/internal/queue/queue.go b/internal/queue/queue.go index 46e5108..78700d9 100644 --- a/internal/queue/queue.go +++ b/internal/queue/queue.go @@ -8,7 +8,6 @@ package queue import ( "context" "encoding/base64" - "expvar" "fmt" "math/rand" "os" @@ -23,6 +22,7 @@ import ( "blitiri.com.ar/go/chasquid/internal/aliases" "blitiri.com.ar/go/chasquid/internal/courier" "blitiri.com.ar/go/chasquid/internal/envelope" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/chasquid/internal/maillog" "blitiri.com.ar/go/chasquid/internal/protoio" "blitiri.com.ar/go/chasquid/internal/set" @@ -54,10 +54,14 @@ var ( // Exported variables. var ( - putCount = expvar.NewInt("chasquid/queue/putCount") - itemsWritten = expvar.NewInt("chasquid/queue/itemsWritten") - dsnQueued = expvar.NewInt("chasquid/queue/dsnQueued") - deliverAttempts = expvar.NewMap("chasquid/queue/deliverAttempts") + putCount = expvarom.NewInt("chasquid/queue/putCount", + "count of envelopes attempted to be put in the queue") + itemsWritten = expvarom.NewInt("chasquid/queue/itemsWritten", + "count of items the queue wrote to disk") + dsnQueued = expvarom.NewInt("chasquid/queue/dsnQueued", + "count of DSNs that we generated (queued)") + deliverAttempts = expvarom.NewMap("chasquid/queue/deliverAttempts", + "recipient_type", "attempts to deliver mail, by recipient type") ) // Channel used to get random IDs for items in the queue. diff --git a/internal/smtpsrv/conn.go b/internal/smtpsrv/conn.go index eef67d4..a35677f 100644 --- a/internal/smtpsrv/conn.go +++ b/internal/smtpsrv/conn.go @@ -5,7 +5,6 @@ import ( "bytes" "context" "crypto/tls" - "expvar" "flag" "fmt" "io" @@ -25,6 +24,7 @@ import ( "blitiri.com.ar/go/chasquid/internal/auth" "blitiri.com.ar/go/chasquid/internal/domaininfo" "blitiri.com.ar/go/chasquid/internal/envelope" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/chasquid/internal/maillog" "blitiri.com.ar/go/chasquid/internal/normalize" "blitiri.com.ar/go/chasquid/internal/queue" @@ -36,13 +36,20 @@ import ( // Exported variables. var ( - commandCount = expvar.NewMap("chasquid/smtpIn/commandCount") - responseCodeCount = expvar.NewMap("chasquid/smtpIn/responseCodeCount") - spfResultCount = expvar.NewMap("chasquid/smtpIn/spfResultCount") - loopsDetected = expvar.NewInt("chasquid/smtpIn/loopsDetected") - tlsCount = expvar.NewMap("chasquid/smtpIn/tlsCount") - slcResults = expvar.NewMap("chasquid/smtpIn/securityLevelChecks") - hookResults = expvar.NewMap("chasquid/smtpIn/hookResults") + commandCount = expvarom.NewMap("chasquid/smtpIn/commandCount", + "command", "count of SMTP commands received, by command") + responseCodeCount = expvarom.NewMap("chasquid/smtpIn/responseCodeCount", + "code", "response codes returned to SMTP commands") + spfResultCount = expvarom.NewMap("chasquid/smtpIn/spfResultCount", + "result", "SPF result count") + loopsDetected = expvarom.NewInt("chasquid/smtpIn/loopsDetected", + "count of loops detected") + tlsCount = expvarom.NewMap("chasquid/smtpIn/tlsCount", + "status", "count of TLS usage in incoming connections") + slcResults = expvarom.NewMap("chasquid/smtpIn/securityLevelChecks", + "result", "incoming security level check results") + hookResults = expvarom.NewMap("chasquid/smtpIn/hookResults", + "result", "count of hook invocations, by result") ) var ( diff --git a/internal/sts/sts.go b/internal/sts/sts.go index 94ac925..afcd0eb 100644 --- a/internal/sts/sts.go +++ b/internal/sts/sts.go @@ -12,7 +12,6 @@ import ( "context" "encoding/json" "errors" - "expvar" "fmt" "io" "io/ioutil" @@ -25,6 +24,7 @@ import ( "sync" "time" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/chasquid/internal/safeio" "blitiri.com.ar/go/chasquid/internal/trace" @@ -34,20 +34,31 @@ import ( // Exported variables. var ( - cacheFetches = expvar.NewInt("chasquid/sts/cache/fetches") - cacheHits = expvar.NewInt("chasquid/sts/cache/hits") - cacheExpired = expvar.NewInt("chasquid/sts/cache/expired") - - cacheIOErrors = expvar.NewInt("chasquid/sts/cache/ioErrors") - cacheFailedFetch = expvar.NewInt("chasquid/sts/cache/failedFetch") - cacheInvalid = expvar.NewInt("chasquid/sts/cache/invalid") - - cacheMarshalErrors = expvar.NewInt("chasquid/sts/cache/marshalErrors") - cacheUnmarshalErrors = expvar.NewInt("chasquid/sts/cache/unmarshalErrors") - - cacheRefreshCycles = expvar.NewInt("chasquid/sts/cache/refreshCycles") - cacheRefreshes = expvar.NewInt("chasquid/sts/cache/refreshes") - cacheRefreshErrors = expvar.NewInt("chasquid/sts/cache/refreshErrors") + cacheFetches = expvarom.NewInt("chasquid/sts/cache/fetches", + "count of total fetches in the STS cache") + cacheHits = expvarom.NewInt("chasquid/sts/cache/hits", + "count of hits in the STS cache") + cacheExpired = expvarom.NewInt("chasquid/sts/cache/expired", + "count of expired entries in the STS cache") + + cacheIOErrors = expvarom.NewInt("chasquid/sts/cache/ioErrors", + "count of I/O errors when maintaining STS cache") + cacheFailedFetch = expvarom.NewInt("chasquid/sts/cache/failedFetch", + "count of failed fetches in the STS cache") + cacheInvalid = expvarom.NewInt("chasquid/sts/cache/invalid", + "count of invalid policies in the STS cache") + + cacheMarshalErrors = expvarom.NewInt("chasquid/sts/cache/marshalErrors", + "count of marshalling errors when maintaining STS cache") + cacheUnmarshalErrors = expvarom.NewInt("chasquid/sts/cache/unmarshalErrors", + "count of unmarshalling errors in STS cache") + + cacheRefreshCycles = expvarom.NewInt("chasquid/sts/cache/refreshCycles", + "count of STS cache refresh cycles") + cacheRefreshes = expvarom.NewInt("chasquid/sts/cache/refreshes", + "count of STS cache refreshes") + cacheRefreshErrors = expvarom.NewInt("chasquid/sts/cache/refreshErrors", + "count of STS cache refresh errors") ) // Policy represents a parsed policy. diff --git a/monitoring.go b/monitoring.go index 1f6827f..cb70422 100644 --- a/monitoring.go +++ b/monitoring.go @@ -9,6 +9,7 @@ import ( "time" "blitiri.com.ar/go/chasquid/internal/config" + "blitiri.com.ar/go/chasquid/internal/expvarom" "blitiri.com.ar/go/log" "google.golang.org/protobuf/encoding/prototext" @@ -45,6 +46,7 @@ func launchMonitoringServer(conf *config.Config) { } }) + http.HandleFunc("/metrics", expvarom.MetricsHandler) http.HandleFunc("/debug/flags", debugFlagsHandler) http.HandleFunc("/debug/config", debugConfigHandler(conf)) @@ -90,8 +92,12 @@ os hostname <i>{{.Hostname}}</i><p> <ul> <li><a href="/debug/requests?exp=1">requests (short-lived)</a> <li><a href="/debug/events?exp=1">events (long-lived)</a> - <li><a href="/debug/vars">exported variables</a> - <small><a href="https://golang.org/pkg/expvar/">(ref)</a></small> + <li><a href="https://blitiri.com.ar/p/chasquid/monitoring/#variables"> + exported variables</a>: + <a href="/debug/vars">expvar</a> + <small><a href="https://golang.org/pkg/expvar/">(ref)</a></small>, + <a href="/metrics">openmetrics</a> + <small><a href="https://openmetrics.io/">(ref)</a></small> </ul> <li>execution <ul> diff --git a/test/cover.sh b/test/cover.sh index 822c98e..583ce5e 100755 --- a/test/cover.sh +++ b/test/cover.sh @@ -23,10 +23,17 @@ mkdir -p .coverage export COVER_DIR="$PWD/.coverage" # Normal go tests. -go test -tags coverage \ - -covermode=count \ - -coverprofile="$COVER_DIR/pkg-tests.out"\ - -coverpkg=./... ./... +# We have to run them one by one because the expvar registration causes +# the single-binary tests to fail: cross-package expvars confuse the expvarom +# tests, which don't expect any expvars to exists besides the one registered +# in the tests themselves. +for pkg in $(go list ./... | grep -v chasquid/cmd/); do + OUT_FILE="$COVER_DIR/pkg-`echo $pkg | sed s+/+_+g`.out" + go test -tags coverage \ + -covermode=count \ + -coverprofile="$OUT_FILE" \ + -coverpkg=./... $pkg +done # Integration tests. # Will run in coverage mode due to $COVER_DIR being set. diff --git a/test/t-09-loop/run.sh b/test/t-09-loop/run.sh index c4f07d0..1469502 100755 --- a/test/t-09-loop/run.sh +++ b/test/t-09-loop/run.sh @@ -55,6 +55,13 @@ fetch http://localhost:1099/debug/config .data-A/dbg-config \ || fail "failed to fetch /debug/config" fetch http://localhost:1099/404 .data-A/dbg-404 \ && fail "fetch /404 worked, should have failed" +fetch http://localhost:1099/metrics .data-A/metrics \ + && linesgt10 .data-A/metrics \ + || fail "failed to fetch /metrics" + +# Quick sanity-check of the /metrics page, just in case. +grep -q '^chasquid_queue_itemsWritten [0-9]\+$' .data-A/metrics \ + || fail "A /metrics is missing the chasquid_queue_itemsWritten counter" # Wait until one of them has noticed and stopped the loop. while sleep 0.1; do