git » chasquid » commit 7fe42a3

monitoring: Add OpenMetrics exporter

author Alberto Bertogli
2020-08-19 19:42:28 UTC
committer Alberto Bertogli
2020-08-21 11:07:33 UTC
parent 7e412db19b4a93146368af3c5740d570207754a6

monitoring: Add OpenMetrics exporter

This patch makes chasquid's monitoring server expose an OpenMetrics
metrics endpoint.

It adds a new package "expvarom" which implements an HTTP handler that
exports expvar variables in the OpenMetrics text format.

Then, the handler is registered by the monitoring server at /metrics
(where most things expect it to be).

The existing exported variables are also extended with descriptions,
which is optional, but improves the readability of the metrics.

chasquid.go +3 -1
docs/monitoring.md +8 -115
internal/aliases/aliases.go +3 -2
internal/courier/smtp.go +10 -6
internal/expvarom/expvarom.go +224 -0
internal/expvarom/expvarom_test.go +127 -0
internal/queue/queue.go +9 -5
internal/smtpsrv/conn.go +15 -8
internal/sts/sts.go +26 -15
monitoring.go +8 -2
test/cover.sh +11 -4
test/t-09-loop/run.sh +7 -0

diff --git a/chasquid.go b/chasquid.go
index dbd16a6..a1d195d 100644
--- a/chasquid.go
+++ b/chasquid.go
@@ -22,6 +22,7 @@ import (
 	"blitiri.com.ar/go/chasquid/internal/config"
 	"blitiri.com.ar/go/chasquid/internal/courier"
 	"blitiri.com.ar/go/chasquid/internal/dovecot"
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/maillog"
 	"blitiri.com.ar/go/chasquid/internal/normalize"
 	"blitiri.com.ar/go/chasquid/internal/smtpsrv"
@@ -52,7 +53,8 @@ var (
 
 	sourceDate      time.Time
 	sourceDateVar   = expvar.NewString("chasquid/sourceDateStr")
-	sourceDateTsVar = expvar.NewInt("chasquid/sourceDateTimestamp")
+	sourceDateTsVar = expvarom.NewInt("chasquid/sourceDateTimestamp",
+		"timestamp when the binary was built, in seconds since epoch")
 )
 
 func main() {
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 0582e7d..731c079 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -23,11 +23,16 @@ These include:
 ## Variables
 
 chasquid exports some variables for monitoring, via the standard
-[expvar](https://golang.org/pkg/expvar/) package, which can be useful for
+[expvar](https://golang.org/pkg/expvar/) package and the
+[OpenMetrics](https://openmetrics.io/) text format, which can be useful for
 whitebox monitoring.
 
-They're accessible over the monitoring http server, at `/debug/vars` (default
-endpoint for expvars).
+They're accessible on the monitoring HTTP server, at `/debug/vars` (default
+endpoint for expvars) and `/metrics` (common endpoint for openmetrics).
+
+<a name="prometheus"></a>
+The `/metrics` endpoint is also compatible with
+[Prometheus](https://prometheus.io/).
 
 *Note these are still subject to change, although breaking changes will be
 avoided whenever possible, and will be noted in the [release
@@ -91,115 +96,3 @@ List of exported variables:
 - **chasquid/sts/cache/unmarshalErrors** (counter): count of unmarshaling
   errors as part of keeping the STS cache.
 - **chasquid/version** (string): version string.
-
-
-## Prometheus
-
-To monitor chasquid using [Prometheus](https://prometheus.io), you can use the
-[prometheus-expvar-exporter](https://blitiri.com.ar/git/r/prometheus-expvar-exporter/b/master/t/f=README.md.html)
-with the following configuration:
-
-```toml
-# Address to listen on. Prometheus should be told to scrape this.
-listen_addr = ":8000"
-
-[chasquid]
-# Replace with the address of chasquid's monitoring server.
-url = "http://localhost:1099/debug/vars"
-
-# Metrics are auto-imported, but some can't be; in particular the ones with
-# labels need explicit definitions here.
-
-m.aliases_hook_results.expvar ="chasquid/aliases/hookResults"
-m.aliases_hook_results.help ="aliases hook results"
-m.aliases_hook_results.label_name ="result"
-
-m.deliver_attempts.expvar = "chasquid/queue/deliverAttempts"
-m.deliver_attempts.help = "attempts to deliver mail"
-m.deliver_attempts.label_name = "recipient_type"
-
-m.dsn_queued.expvar = "chasquid/queue/dsnQueued"
-m.dsn_queued.help = "DSN queued"
-
-m.items_written.expvar = "chasquid/queue/itemsWritten"
-m.items_written.help = "items written"
-
-m.queue_puts.expvar = "chasquid/queue/putCount"
-m.queue_puts.help = "chasquid/queue/putCount"
-
-m.smtpin_commands.expvar = "chasquid/smtpIn/commandCount"
-m.smtpin_commands.help = "incoming SMTP command count"
-m.smtpin_commands.label_name = "command"
-
-m.smtp_hook_results.expvar = "chasquid/smtpIn/hookResults"
-m.smtp_hook_results.help = "hook invocation results"
-m.smtp_hook_results.label_name = "result"
-
-m.loops_detected.expvar = "chasquid/smtpIn/loopsDetected"
-m.loops_detected.help = "loops detected"
-
-m.smtp_response_codes.expvar = "chasquid/smtpIn/responseCodeCount"
-m.smtp_response_codes.help = "response codes returned to SMTP commands"
-m.smtp_response_codes.label_name = "code"
-
-m.in_sec_level_checks.expvar = "chasquid/smtpIn/securityLevelChecks"
-m.in_sec_level_checks.help = "incoming security level check results"
-m.in_sec_level_checks.label_name = "result"
-
-m.spf_results.expvar = "chasquid/smtpIn/spfResultCount"
-m.spf_results.help = "SPF result count"
-m.spf_results.label_name = "result"
-
-m.in_tls_usage.expvar = "chasquid/smtpIn/tlsCount"
-m.in_tls_usage.help = "count of TLS usage in incoming connections"
-m.in_tls_usage.label_name = "status"
-
-m.out_sec_level_checks.expvar = "chasquid/smtpOut/securityLevelChecks"
-m.out_sec_level_checks.help = "outgoing security level check results"
-m.out_sec_level_checks.label_name = "result"
-
-m.sts_modes.expvar = "chasquid/smtpOut/sts/mode"
-m.sts_modes.help = "STS checks on outgoing connections, by mode"
-m.sts_modes.label_name = "mode"
-
-m.sts_security.expvar = "chasquid/smtpOut/sts/security"
-m.sts_security.help = "STS security checks on outgoing connections, by result"
-m.sts_security.label_name = "result"
-
-m.out_tls_usage.expvar = "chasquid/smtpOut/tlsCount"
-m.out_tls_usage.help = "count of TLS usage in outgoing connections"
-m.out_tls_usage.label_name = "status"
-
-m.sts_cache_expired.expvar = "chasquid/sts/cache/expired"
-m.sts_cache_expired.help = "expired entries in the STS cache"
-
-m.sts_cache_failed_fetch.expvar = "chasquid/sts/cache/failedFetch"
-m.sts_cache_failed_fetch.help = "failed fetches in the STS cache"
-
-m.sts_cache_fetches.expvar = "chasquid/sts/cache/fetches"
-m.sts_cache_fetches.help = "total fetches in the STS cache"
-
-m.sts_cache_hits.expvar = "chasquid/sts/cache/hits"
-m.sts_cache_hits.help = "hits in the STS cache"
-
-m.sts_cache_invalid.expvar = "chasquid/sts/cache/invalid"
-m.sts_cache_invalid.help = "invalid policies in the STS cache"
-
-m.sts_cache_io_errors.expvar = "chasquid/sts/cache/ioErrors"
-m.sts_cache_io_errors.help = "I/O errors when maintaining STS cache"
-
-m.sts_cache_marshal_errors.expvar = "chasquid/sts/cache/marshalErrors"
-m.sts_cache_marshal_errors.help = "marshalling errors when maintaining STS cache"
-
-m.sts_cache_refresh_cycles.expvar = "chasquid/sts/cache/refreshCycles"
-m.sts_cache_refresh_cycles.help = "STS cache refresh cycles"
-
-m.sts_cache_refresh_errors.expvar = "chasquid/sts/cache/refreshErrors"
-m.sts_cache_refresh_errors.help = "STS cache refresh errors"
-
-m.sts_cache_refreshes.expvar = "chasquid/sts/cache/refreshes"
-m.sts_cache_refreshes.help = "count of STS cache refreshes"
-
-m.sts_cache_unmarshal_errors.expvar = "chasquid/sts/cache/unmarshalErrors"
-m.sts_cache_unmarshal_errors.help = "unmarshalling errors in STS cache"
-```
diff --git a/internal/aliases/aliases.go b/internal/aliases/aliases.go
index fc29951..897659f 100644
--- a/internal/aliases/aliases.go
+++ b/internal/aliases/aliases.go
@@ -56,7 +56,6 @@ package aliases
 import (
 	"bufio"
 	"context"
-	"expvar"
 	"fmt"
 	"io"
 	"os"
@@ -66,13 +65,15 @@ import (
 	"time"
 
 	"blitiri.com.ar/go/chasquid/internal/envelope"
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/normalize"
 	"blitiri.com.ar/go/chasquid/internal/trace"
 )
 
 // Exported variables.
 var (
-	hookResults = expvar.NewMap("chasquid/aliases/hookResults")
+	hookResults = expvarom.NewMap("chasquid/aliases/hookResults",
+		"result", "count of aliases hook results, by hook and result")
 )
 
 // Recipient represents a single recipient, after resolving aliases.
diff --git a/internal/courier/smtp.go b/internal/courier/smtp.go
index f09e100..6a93c15 100644
--- a/internal/courier/smtp.go
+++ b/internal/courier/smtp.go
@@ -3,7 +3,6 @@ package courier
 import (
 	"context"
 	"crypto/tls"
-	"expvar"
 	"flag"
 	"net"
 	"time"
@@ -12,6 +11,7 @@ import (
 
 	"blitiri.com.ar/go/chasquid/internal/domaininfo"
 	"blitiri.com.ar/go/chasquid/internal/envelope"
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/smtp"
 	"blitiri.com.ar/go/chasquid/internal/sts"
 	"blitiri.com.ar/go/chasquid/internal/trace"
@@ -35,11 +35,15 @@ var (
 
 // Exported variables.
 var (
-	tlsCount   = expvar.NewMap("chasquid/smtpOut/tlsCount")
-	slcResults = expvar.NewMap("chasquid/smtpOut/securityLevelChecks")
-
-	stsSecurityModes   = expvar.NewMap("chasquid/smtpOut/sts/mode")
-	stsSecurityResults = expvar.NewMap("chasquid/smtpOut/sts/security")
+	tlsCount = expvarom.NewMap("chasquid/smtpOut/tlsCount",
+		"result", "count of TLS status on outgoing connections")
+	slcResults = expvarom.NewMap("chasquid/smtpOut/securityLevelChecks",
+		"result", "count of security level checks on outgoing connections")
+
+	stsSecurityModes = expvarom.NewMap("chasquid/smtpOut/sts/mode",
+		"mode", "count of STS checks on outgoing connections")
+	stsSecurityResults = expvarom.NewMap("chasquid/smtpOut/sts/security",
+		"result", "count of STS security checks on outgoing connections")
 )
 
 // SMTP delivers remote mail via outgoing SMTP.
diff --git a/internal/expvarom/expvarom.go b/internal/expvarom/expvarom.go
new file mode 100644
index 0000000..1d02fd7
--- /dev/null
+++ b/internal/expvarom/expvarom.go
@@ -0,0 +1,224 @@
+// Package expvarom implements an OpenMetrics HTTP exporter for the variables
+// from the expvar package.
+//
+// This is useful for small servers that want to support both packages with
+// simple enough variables, without introducing any dependencies beyond the
+// standard library.
+//
+// Some functions to add descriptions and map labels are exported for
+// convenience, but their usage is optional.
+//
+// For more complex usage (like histograms, counters vs. gauges, etc.), use
+// the OpenMetrics libraries directly.
+//
+// The exporter uses the text-based format, as documented in:
+// https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format
+//
+// Note the adoption of that format as OpenMetrics' one isn't finalized yet,
+// and it is possible that it will change in the future.
+//
+// Backwards compatibility is NOT guaranteed, until the format is fully
+// standarized.
+package expvarom
+
+import (
+	"expvar"
+	"fmt"
+	"io"
+	"net/http"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"unicode/utf8"
+)
+
+type exportedVar struct {
+	Name      string
+	Desc      string
+	LabelName string
+
+	I *expvar.Int
+	F *expvar.Float
+	M *expvar.Map
+}
+
+var (
+	infoMu        = sync.Mutex{}
+	descriptions  = map[string]string{}
+	mapLabelNames = map[string]string{}
+)
+
+// MetricsHandler implements an http.HandlerFunc which serves the registered
+// metrics, using the OpenMetrics text-based format.
+func MetricsHandler(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "text/plain; version=0.0.4")
+
+	vars := []exportedVar{}
+	ignored := []string{}
+	expvar.Do(func(kv expvar.KeyValue) {
+		evar := exportedVar{
+			Name: metricNameToOM(kv.Key),
+		}
+		switch value := kv.Value.(type) {
+		case *expvar.Int:
+			evar.I = value
+		case *expvar.Float:
+			evar.F = value
+		case *expvar.Map:
+			evar.M = value
+		default:
+			// Unsupported type, ignore this variable.
+			ignored = append(ignored, evar.Name)
+			return
+		}
+
+		infoMu.Lock()
+		evar.Desc = descriptions[kv.Key]
+		evar.LabelName = mapLabelNames[kv.Key]
+		infoMu.Unlock()
+
+		// OM maps need a label name, while expvar ones do not. If we weren't
+		// told what to use, use a generic "key".
+		if evar.LabelName == "" {
+			evar.LabelName = "key"
+		}
+
+		vars = append(vars, evar)
+	})
+
+	// Sort the variables for reproducibility and readability.
+	sort.Slice(vars, func(i, j int) bool {
+		return vars[i].Name < vars[j].Name
+	})
+
+	for _, v := range vars {
+		writeVar(w, &v)
+	}
+
+	fmt.Fprintf(w, "# Generated by expvarom\n")
+	fmt.Fprintf(w, "# EXPERIMENTAL - Format is not fully standard yet\n")
+	fmt.Fprintf(w, "# Ignored variables: %q\n", ignored)
+}
+
+func writeVar(w io.Writer, v *exportedVar) {
+	if v.Desc != "" {
+		fmt.Fprintf(w, "# HELP %s %s\n", v.Name, v.Desc)
+	}
+
+	if v.I != nil {
+		fmt.Fprintf(w, "%s %d\n\n", v.Name, v.I.Value())
+		return
+	}
+
+	if v.F != nil {
+		fmt.Fprintf(w, "%s %g\n\n", v.Name, v.F.Value())
+		return
+	}
+
+	if v.M != nil {
+		count := 0
+		v.M.Do(func(kv expvar.KeyValue) {
+			vs := ""
+			switch value := kv.Value.(type) {
+			case *expvar.Int:
+				vs = strconv.FormatInt(value.Value(), 10)
+			case *expvar.Float:
+				vs = strconv.FormatFloat(value.Value(), 'g', -1, 64)
+			default:
+				// We only support Int and Float in maps.
+				return
+			}
+
+			labelValue := quoteLabelValue(kv.Key)
+
+			fmt.Fprintf(w, "%s{%s=%s} %s\n",
+				v.Name, v.LabelName, labelValue, vs)
+			count++
+		})
+		if count > 0 {
+			fmt.Fprintf(w, "\n")
+		}
+	}
+}
+
+// metricNameToOM converts an expvar metric name into an OpenMetrics-compliant
+// metric name. The latter is more restrictive, as it must match the regexp
+// "[a-zA-Z_:][a-zA-Z0-9_:]*", AND the ':' is not allowed for a direct
+// exporter.
+//
+// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
+func metricNameToOM(name string) string {
+	n := ""
+	for _, c := range name {
+		if (c >= 'a' && c <= 'z') ||
+			(c >= 'A' && c <= 'Z') ||
+			(c >= '0' && c <= '9') ||
+			c == '_' {
+			n += string(c)
+		} else {
+			n += "_"
+		}
+	}
+
+	// If it begins with a number, prepend 'i' as a compromise.
+	if len(n) > 0 && n[0] >= '0' && n[0] <= '9' {
+		n = "i" + n
+	}
+
+	return n
+}
+
+// According to the spec, we only need to replace these 3 characters in label
+// values.
+var labelValueReplacer = strings.NewReplacer(
+	`\`, `\\`,
+	`"`, `\"`,
+	"\n", `\n`)
+
+// quoteLabelValue takes an arbitrary string, and quotes it so it can be
+// used as a label value. Output includes the wrapping `"`.
+func quoteLabelValue(v string) string {
+	// The spec requires label values to be valid UTF8, with `\`, `"` and "\n"
+	// escaped.  If it's invalid UTF8, hard-quote it first.  This will result
+	// in uglier looking values, but they will be well formed.
+	if !utf8.ValidString(v) {
+		v = strconv.QuoteToASCII(v)
+		v = v[1 : len(v)-1]
+	}
+
+	return `"` + labelValueReplacer.Replace(v) + `"`
+}
+
+// NewInt registers a new expvar.Int variable, with the given description.
+func NewInt(name, desc string) *expvar.Int {
+	infoMu.Lock()
+	descriptions[name] = desc
+	infoMu.Unlock()
+	return expvar.NewInt(name)
+}
+
+// NewFloat registers a new expvar.Float variable, with the given description.
+func NewFloat(name, desc string) *expvar.Float {
+	infoMu.Lock()
+	descriptions[name] = desc
+	infoMu.Unlock()
+	return expvar.NewFloat(name)
+}
+
+// NewMap registers a new expvar.Map variable, with the given label
+// name and description.
+func NewMap(name, labelName, desc string) *expvar.Map {
+	// Prevent accidents when using the description as the label name.
+	if strings.Contains(labelName, " ") {
+		panic(fmt.Sprintf(
+			"label name has spaces, mix up with the description? %q",
+			labelName))
+	}
+
+	infoMu.Lock()
+	descriptions[name] = desc
+	mapLabelNames[name] = labelName
+	infoMu.Unlock()
+	return expvar.NewMap(name)
+}
diff --git a/internal/expvarom/expvarom_test.go b/internal/expvarom/expvarom_test.go
new file mode 100644
index 0000000..4ce3c1e
--- /dev/null
+++ b/internal/expvarom/expvarom_test.go
@@ -0,0 +1,127 @@
+package expvarom
+
+import (
+	"expvar"
+	"io/ioutil"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+var (
+	testI1 = NewInt("testI1", "int test var")
+	testI2 = expvar.NewInt("testI2")
+
+	testF = NewFloat("testF", "float test var")
+
+	testMI  = NewMap("testMI", "label", "int map test var")
+	testMF  = NewMap("testMF", "label", "float map test var")
+	testMXI = expvar.NewMap("testMXI")
+	testMXF = expvar.NewMap("testMXF")
+
+	testMEmpty = expvar.NewMap("testMEmpty") //nolint // Unused.
+
+	testMOther = expvar.NewMap("testMOther")
+
+	testS = expvar.NewString("testS")
+
+	// Naming test cases.
+	testN1 = expvar.NewInt("name/1z")
+	testN2 = NewInt("name$2", "name with $")
+	testN3 = expvar.NewInt("3name")
+	testN4 = expvar.NewInt("nAme_4Z")
+	testN5 = expvar.NewInt("ñame_5")
+)
+
+const expected string = `_ame_5 5
+
+i3name 3
+
+nAme_4Z 4
+
+name_1z 1
+
+# HELP name_2 name with $
+name_2 2
+
+# HELP testF float test var
+testF 3.43434
+
+# HELP testI1 int test var
+testI1 1
+
+testI2 2
+
+# HELP testMF float map test var
+testMF{label="key2.0"} 6.6
+testMF{label="key2.1"} 6.61
+testMF{label="key2.2-ñaca"} 6.62
+testMF{label="key2.3-a\\b"} 6.63
+testMF{label="key2.4-	"} 6.64
+testMF{label="key2.5-a\nb"} 6.65
+testMF{label="key2.6-a\"b"} 6.66
+testMF{label="key2.7-\\u00f1aca-A\\t\\xff\\xfe\\xfdB"} 6.67
+
+# HELP testMI int map test var
+testMI{label="key1"} 5
+
+testMXF{key="key4"} 8e-08
+
+testMXI{key="key3"} 7
+
+# Generated by expvarom
+# EXPERIMENTAL - Format is not fully standard yet
+# Ignored variables: ["cmdline" "memstats" "testS"]
+`
+
+func TestHandler(t *testing.T) {
+	testI1.Add(1)
+	testI2.Add(2)
+	testF.Add(3.43434)
+	testMI.Add("key1", 5)
+
+	// Test some strange keys in this map to check they're escaped properly.
+	testMF.AddFloat("key2.0", 6.60)
+	testMF.AddFloat("key2.1", 6.61)
+	testMF.AddFloat("key2.2-ñaca", 6.62)
+	testMF.AddFloat(`key2.3-a\b`, 6.63)
+	testMF.AddFloat("key2.4-\t", 6.64)
+	testMF.AddFloat("key2.5-a\nb", 6.65)
+	testMF.AddFloat(`key2.6-a"b`, 6.66)
+	testMF.AddFloat("key2.7-ñaca-A\t\xff\xfe\xfdB", 6.67) // Invalid utf8.
+
+	testMXI.Add("key3", 7)
+	testMXF.AddFloat("key4", 8e-8)
+	testS.Set("lalala")
+
+	testN1.Add(1)
+	testN2.Add(2)
+	testN3.Add(3)
+	testN4.Add(4)
+	testN5.Add(5)
+
+	// Map with an unsupported type.
+	testMOther.Set("keyX", &expvar.String{})
+
+	req := httptest.NewRequest("get", "/metrics", nil)
+	w := httptest.NewRecorder()
+	MetricsHandler(w, req)
+
+	resp := w.Result()
+	body, _ := ioutil.ReadAll(resp.Body)
+
+	if diff := cmp.Diff(expected, string(body)); diff != "" {
+		t.Errorf("MetricsHandler() mismatch (-want +got):\n%s", diff)
+	}
+}
+
+func TestMapLabelAccident(t *testing.T) {
+	defer func() {
+		if r := recover(); r == nil {
+			t.Errorf("NewMap did not panic as expected")
+		}
+	}()
+
+	NewMap("name", "label with spaces", "description")
+}
diff --git a/internal/queue/queue.go b/internal/queue/queue.go
index 46e5108..78700d9 100644
--- a/internal/queue/queue.go
+++ b/internal/queue/queue.go
@@ -8,7 +8,6 @@ package queue
 import (
 	"context"
 	"encoding/base64"
-	"expvar"
 	"fmt"
 	"math/rand"
 	"os"
@@ -23,6 +22,7 @@ import (
 	"blitiri.com.ar/go/chasquid/internal/aliases"
 	"blitiri.com.ar/go/chasquid/internal/courier"
 	"blitiri.com.ar/go/chasquid/internal/envelope"
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/maillog"
 	"blitiri.com.ar/go/chasquid/internal/protoio"
 	"blitiri.com.ar/go/chasquid/internal/set"
@@ -54,10 +54,14 @@ var (
 
 // Exported variables.
 var (
-	putCount        = expvar.NewInt("chasquid/queue/putCount")
-	itemsWritten    = expvar.NewInt("chasquid/queue/itemsWritten")
-	dsnQueued       = expvar.NewInt("chasquid/queue/dsnQueued")
-	deliverAttempts = expvar.NewMap("chasquid/queue/deliverAttempts")
+	putCount = expvarom.NewInt("chasquid/queue/putCount",
+		"count of envelopes attempted to be put in the queue")
+	itemsWritten = expvarom.NewInt("chasquid/queue/itemsWritten",
+		"count of items the queue wrote to disk")
+	dsnQueued = expvarom.NewInt("chasquid/queue/dsnQueued",
+		"count of DSNs that we generated (queued)")
+	deliverAttempts = expvarom.NewMap("chasquid/queue/deliverAttempts",
+		"recipient_type", "attempts to deliver mail, by recipient type")
 )
 
 // Channel used to get random IDs for items in the queue.
diff --git a/internal/smtpsrv/conn.go b/internal/smtpsrv/conn.go
index eef67d4..a35677f 100644
--- a/internal/smtpsrv/conn.go
+++ b/internal/smtpsrv/conn.go
@@ -5,7 +5,6 @@ import (
 	"bytes"
 	"context"
 	"crypto/tls"
-	"expvar"
 	"flag"
 	"fmt"
 	"io"
@@ -25,6 +24,7 @@ import (
 	"blitiri.com.ar/go/chasquid/internal/auth"
 	"blitiri.com.ar/go/chasquid/internal/domaininfo"
 	"blitiri.com.ar/go/chasquid/internal/envelope"
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/maillog"
 	"blitiri.com.ar/go/chasquid/internal/normalize"
 	"blitiri.com.ar/go/chasquid/internal/queue"
@@ -36,13 +36,20 @@ import (
 
 // Exported variables.
 var (
-	commandCount      = expvar.NewMap("chasquid/smtpIn/commandCount")
-	responseCodeCount = expvar.NewMap("chasquid/smtpIn/responseCodeCount")
-	spfResultCount    = expvar.NewMap("chasquid/smtpIn/spfResultCount")
-	loopsDetected     = expvar.NewInt("chasquid/smtpIn/loopsDetected")
-	tlsCount          = expvar.NewMap("chasquid/smtpIn/tlsCount")
-	slcResults        = expvar.NewMap("chasquid/smtpIn/securityLevelChecks")
-	hookResults       = expvar.NewMap("chasquid/smtpIn/hookResults")
+	commandCount = expvarom.NewMap("chasquid/smtpIn/commandCount",
+		"command", "count of SMTP commands received, by command")
+	responseCodeCount = expvarom.NewMap("chasquid/smtpIn/responseCodeCount",
+		"code", "response codes returned to SMTP commands")
+	spfResultCount = expvarom.NewMap("chasquid/smtpIn/spfResultCount",
+		"result", "SPF result count")
+	loopsDetected = expvarom.NewInt("chasquid/smtpIn/loopsDetected",
+		"count of loops detected")
+	tlsCount = expvarom.NewMap("chasquid/smtpIn/tlsCount",
+		"status", "count of TLS usage in incoming connections")
+	slcResults = expvarom.NewMap("chasquid/smtpIn/securityLevelChecks",
+		"result", "incoming security level check results")
+	hookResults = expvarom.NewMap("chasquid/smtpIn/hookResults",
+		"result", "count of hook invocations, by result")
 )
 
 var (
diff --git a/internal/sts/sts.go b/internal/sts/sts.go
index 94ac925..afcd0eb 100644
--- a/internal/sts/sts.go
+++ b/internal/sts/sts.go
@@ -12,7 +12,6 @@ import (
 	"context"
 	"encoding/json"
 	"errors"
-	"expvar"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -25,6 +24,7 @@ import (
 	"sync"
 	"time"
 
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/safeio"
 	"blitiri.com.ar/go/chasquid/internal/trace"
 
@@ -34,20 +34,31 @@ import (
 
 // Exported variables.
 var (
-	cacheFetches = expvar.NewInt("chasquid/sts/cache/fetches")
-	cacheHits    = expvar.NewInt("chasquid/sts/cache/hits")
-	cacheExpired = expvar.NewInt("chasquid/sts/cache/expired")
-
-	cacheIOErrors    = expvar.NewInt("chasquid/sts/cache/ioErrors")
-	cacheFailedFetch = expvar.NewInt("chasquid/sts/cache/failedFetch")
-	cacheInvalid     = expvar.NewInt("chasquid/sts/cache/invalid")
-
-	cacheMarshalErrors   = expvar.NewInt("chasquid/sts/cache/marshalErrors")
-	cacheUnmarshalErrors = expvar.NewInt("chasquid/sts/cache/unmarshalErrors")
-
-	cacheRefreshCycles = expvar.NewInt("chasquid/sts/cache/refreshCycles")
-	cacheRefreshes     = expvar.NewInt("chasquid/sts/cache/refreshes")
-	cacheRefreshErrors = expvar.NewInt("chasquid/sts/cache/refreshErrors")
+	cacheFetches = expvarom.NewInt("chasquid/sts/cache/fetches",
+		"count of total fetches in the STS cache")
+	cacheHits = expvarom.NewInt("chasquid/sts/cache/hits",
+		"count of hits in the STS cache")
+	cacheExpired = expvarom.NewInt("chasquid/sts/cache/expired",
+		"count of expired entries in the STS cache")
+
+	cacheIOErrors = expvarom.NewInt("chasquid/sts/cache/ioErrors",
+		"count of I/O errors when maintaining STS cache")
+	cacheFailedFetch = expvarom.NewInt("chasquid/sts/cache/failedFetch",
+		"count of failed fetches in the STS cache")
+	cacheInvalid = expvarom.NewInt("chasquid/sts/cache/invalid",
+		"count of invalid policies in the STS cache")
+
+	cacheMarshalErrors = expvarom.NewInt("chasquid/sts/cache/marshalErrors",
+		"count of marshalling errors when maintaining STS cache")
+	cacheUnmarshalErrors = expvarom.NewInt("chasquid/sts/cache/unmarshalErrors",
+		"count of unmarshalling errors in STS cache")
+
+	cacheRefreshCycles = expvarom.NewInt("chasquid/sts/cache/refreshCycles",
+		"count of STS cache refresh cycles")
+	cacheRefreshes = expvarom.NewInt("chasquid/sts/cache/refreshes",
+		"count of STS cache refreshes")
+	cacheRefreshErrors = expvarom.NewInt("chasquid/sts/cache/refreshErrors",
+		"count of STS cache refresh errors")
 )
 
 // Policy represents a parsed policy.
diff --git a/monitoring.go b/monitoring.go
index 1f6827f..cb70422 100644
--- a/monitoring.go
+++ b/monitoring.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"blitiri.com.ar/go/chasquid/internal/config"
+	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/log"
 	"google.golang.org/protobuf/encoding/prototext"
 
@@ -45,6 +46,7 @@ func launchMonitoringServer(conf *config.Config) {
 		}
 	})
 
+	http.HandleFunc("/metrics", expvarom.MetricsHandler)
 	http.HandleFunc("/debug/flags", debugFlagsHandler)
 	http.HandleFunc("/debug/config", debugConfigHandler(conf))
 
@@ -90,8 +92,12 @@ os hostname <i>{{.Hostname}}</i><p>
     <ul>
       <li><a href="/debug/requests?exp=1">requests (short-lived)</a>
       <li><a href="/debug/events?exp=1">events (long-lived)</a>
-      <li><a href="/debug/vars">exported variables</a>
-        <small><a href="https://golang.org/pkg/expvar/">(ref)</a></small>
+	  <li><a href="https://blitiri.com.ar/p/chasquid/monitoring/#variables">
+	        exported variables</a>:
+          <a href="/debug/vars">expvar</a>
+          <small><a href="https://golang.org/pkg/expvar/">(ref)</a></small>,
+		  <a href="/metrics">openmetrics</a>
+		  <small><a href="https://openmetrics.io/">(ref)</a></small>
     </ul>
   <li>execution
     <ul>
diff --git a/test/cover.sh b/test/cover.sh
index 822c98e..583ce5e 100755
--- a/test/cover.sh
+++ b/test/cover.sh
@@ -23,10 +23,17 @@ mkdir -p .coverage
 export COVER_DIR="$PWD/.coverage"
 
 # Normal go tests.
-go test -tags coverage \
-	-covermode=count \
-	-coverprofile="$COVER_DIR/pkg-tests.out"\
-	-coverpkg=./... ./...
+# We have to run them one by one because the expvar registration causes
+# the single-binary tests to fail: cross-package expvars confuse the expvarom
+# tests, which don't expect any expvars to exists besides the one registered
+# in the tests themselves.
+for pkg in $(go list ./... | grep -v chasquid/cmd/); do
+	OUT_FILE="$COVER_DIR/pkg-`echo $pkg | sed s+/+_+g`.out"
+	go test -tags coverage \
+		-covermode=count \
+		-coverprofile="$OUT_FILE" \
+		-coverpkg=./... $pkg
+done
 
 # Integration tests.
 # Will run in coverage mode due to $COVER_DIR being set.
diff --git a/test/t-09-loop/run.sh b/test/t-09-loop/run.sh
index c4f07d0..1469502 100755
--- a/test/t-09-loop/run.sh
+++ b/test/t-09-loop/run.sh
@@ -55,6 +55,13 @@ fetch http://localhost:1099/debug/config .data-A/dbg-config \
 	|| fail "failed to fetch /debug/config"
 fetch http://localhost:1099/404 .data-A/dbg-404 \
 	&& fail "fetch /404 worked, should have failed"
+fetch http://localhost:1099/metrics .data-A/metrics \
+	&& linesgt10 .data-A/metrics \
+	|| fail "failed to fetch /metrics"
+
+# Quick sanity-check of the /metrics page, just in case.
+grep -q '^chasquid_queue_itemsWritten [0-9]\+$' .data-A/metrics \
+	|| fail "A /metrics is missing the chasquid_queue_itemsWritten counter"
 
 # Wait until one of them has noticed and stopped the loop.
 while sleep 0.1; do