author | Alberto Bertogli
<albertito@blitiri.com.ar> 2023-08-04 00:12:28 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2023-08-04 00:17:24 UTC |
parent | 24c88bb9ebd66ff133592e0374dafb3e5dc72975 |
dnss.go | +5 | -73 |
dnss_test.go | +2 | -3 |
internal/dnsserver/resolver.go | +2 | -2 |
internal/dnsserver/server.go | +3 | -2 |
internal/httpresolver/resolver.go | +3 | -3 |
internal/nettrace/bench_test.go | +57 | -0 |
internal/nettrace/context.go | +40 | -0 |
internal/nettrace/context_test.go | +66 | -0 |
internal/nettrace/evtring.go | +42 | -0 |
internal/nettrace/histogram.go | +71 | -0 |
internal/nettrace/histogram_test.go | +29 | -0 |
internal/nettrace/http.go | +259 | -0 |
internal/nettrace/http_test.go | +255 | -0 |
internal/nettrace/templates/_latency.html.tmpl | +25 | -0 |
internal/nettrace/templates/_recursive.html.tmpl | +51 | -0 |
internal/nettrace/templates/_single.html.tmpl | +35 | -0 |
internal/nettrace/templates/index.html.tmpl | +129 | -0 |
internal/nettrace/templates/style.css | +187 | -0 |
internal/nettrace/test_server.go | +243 | -0 |
internal/nettrace/trace.go | +558 | -0 |
internal/nettrace/trace_test.go | +215 | -0 |
internal/trace/trace.go | +16 | -25 |
monitoring.go | +191 | -0 |
tests/external.sh | +4 | -4 |
diff --git a/dnss.go b/dnss.go index 1ba2d5b..44152f8 100644 --- a/dnss.go +++ b/dnss.go @@ -13,8 +13,6 @@ package main import ( "flag" - "fmt" - "net/http" "net/url" "os" "os/signal" @@ -82,10 +80,14 @@ func main() { flag.Parse() log.Init() + log.Infof("dnss starting (%s, %s)", + Version, + SourceDate.Format("2006-01-02 15:04:05 -0700")) + go signalHandler() if *monitoringListenAddr != "" { - launchMonitoringServer(*monitoringListenAddr) + go monitoringServer(*monitoringListenAddr) } if !(*enableDNStoHTTPS || *enableHTTPStoDNS) { @@ -159,73 +161,3 @@ func signalHandler() { } } } - -func launchMonitoringServer(addr string) { - log.Infof("Monitoring HTTP server listening on %s", addr) - - http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path != "/" { - http.NotFound(w, r) - return - } - w.Write([]byte(monitoringHTMLIndex)) - }) - - flags := dumpFlags() - http.HandleFunc("/debug/flags", func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte(flags)) - }) - - go http.ListenAndServe(addr, nil) -} - -// Static index for the monitoring website. -const monitoringHTMLIndex = `<!DOCTYPE html> -<html> - <head> - <title>dnss monitoring</title> - </head> - <body> - <h1>dnss monitoring</h1> - <ul> - <li><a href="/debug/requests">requests</a> - <small><a href="https://godoc.org/golang.org/x/net/trace"> - (ref)</a></small> - <ul> - <li><a href="/debug/requests?fam=dnsserver&b=11">dns server latency</a> - <li><a href="/debug/requests?fam=dnsserver&b=0&exp=1">dns server trace</a> - </ul> - <li><a href="/debug/dnsserver/cache/dump">cache dump</a> - <li><a href="/debug/pprof">pprof</a> - <small><a href="https://golang.org/pkg/net/http/pprof/"> - (ref)</a></small> - <ul> - <li><a href="/debug/pprof/goroutine?debug=1">goroutines</a> - </ul> - <li><a href="/debug/flags">flags</a> - <li><a href="/debug/vars">public variables</a> - </ul> - </body> -</html> -` - -// dumpFlags to a string, for troubleshooting purposes. -func dumpFlags() string { - s := "" - visited := make(map[string]bool) - - // Print set flags first, then the rest. - flag.Visit(func(f *flag.Flag) { - s += fmt.Sprintf("-%s=%s\n", f.Name, f.Value.String()) - visited[f.Name] = true - }) - - s += "\n" - flag.VisitAll(func(f *flag.Flag) { - if !visited[f.Name] { - s += fmt.Sprintf("-%s=%s\n", f.Name, f.Value.String()) - } - }) - - return s -} diff --git a/dnss_test.go b/dnss_test.go index 18c5771..ae7f6f2 100644 --- a/dnss_test.go +++ b/dnss_test.go @@ -191,13 +191,12 @@ func BenchmarkSimple(b *testing.B) { func TestMonitoringServer(t *testing.T) { addr := testutil.GetFreePort() - launchMonitoringServer(addr) + go monitoringServer(addr) testutil.WaitForHTTPServer(addr) checkGet(t, "http://"+addr+"/") - checkGet(t, "http://"+addr+"/debug/requests") + checkGet(t, "http://"+addr+"/debug/traces") checkGet(t, "http://"+addr+"/debug/pprof/goroutine") - checkGet(t, "http://"+addr+"/debug/flags") checkGet(t, "http://"+addr+"/debug/vars") // Check that we emit 404 for non-existing paths. diff --git a/internal/dnsserver/resolver.go b/internal/dnsserver/resolver.go index 86db83d..db3529e 100644 --- a/internal/dnsserver/resolver.go +++ b/internal/dnsserver/resolver.go @@ -121,7 +121,7 @@ func (c *cachingResolver) DumpCache(w http.ResponseWriter, r *http.Request) { for q, ans := range c.answer { // Only include names and records if we are running verbosily. name := "<hidden>" - if log.V(3) { + if log.V(1) { name = q.Name } @@ -131,7 +131,7 @@ func (c *cachingResolver) DumpCache(w http.ResponseWriter, r *http.Request) { ttl := getTTL(ans) fmt.Fprintf(buf, " expires in %s (%s)\n", ttl, time.Now().Add(ttl)) - if log.V(3) { + if log.V(1) { for _, rr := range ans { fmt.Fprintf(buf, " %s\n", rr.String()) } diff --git a/internal/dnsserver/server.go b/internal/dnsserver/server.go index f78556d..7904582 100644 --- a/internal/dnsserver/server.go +++ b/internal/dnsserver/server.go @@ -63,10 +63,11 @@ func New(addr string, resolver Resolver, unqUpstream string, serverOverrides Dom // Handler for the incoming DNS queries. func (s *Server) Handler(w dns.ResponseWriter, r *dns.Msg) { - tr := trace.New("dnsserver", "Handler") + tr := trace.New("dnsserver.Handler", + w.RemoteAddr().Network()+" "+w.RemoteAddr().String()) defer tr.Finish() - tr.Printf("from:%v id:%v", w.RemoteAddr(), r.Id) + tr.Printf("id:%v", r.Id) tr.Question(r.Question) // We only support single-question queries. diff --git a/internal/httpresolver/resolver.go b/internal/httpresolver/resolver.go index 9df8b08..6d5e0a3 100644 --- a/internal/httpresolver/resolver.go +++ b/internal/httpresolver/resolver.go @@ -99,7 +99,7 @@ func (r *httpsResolver) Init() error { r.client = client r.mu.Unlock() - tr := trace.New("httpresolver", r.Upstream.String()) + tr := trace.New("httpresolver.Client", r.Upstream.String()) tr.Printf("Init complete, client: %p", r.client) tr.Finish() @@ -177,7 +177,7 @@ func (r *httpsResolver) maybeRotateClient() { // The time chosen here combines with the transport timeouts set above, so // we never have too many in-flight connections. if time.Since(r.firstErr) > 10*time.Second { - tr := trace.New("httpresolver", r.Upstream.String()) + tr := trace.New("httpresolver.Client", r.Upstream.String()) defer tr.Finish() tr.Printf("Rotating client after %s of errors: %p", @@ -200,7 +200,7 @@ func (r *httpsResolver) Query(req *dns.Msg, tr *trace.Trace) (*dns.Msg, error) { return nil, fmt.Errorf("cannot pack query: %v", err) } - if log.V(3) { + if log.V(1) { tr.Printf("DoH POST %v", r.Upstream) } diff --git a/internal/nettrace/bench_test.go b/internal/nettrace/bench_test.go new file mode 100644 index 0000000..a58fdbb --- /dev/null +++ b/internal/nettrace/bench_test.go @@ -0,0 +1,57 @@ +package nettrace + +import ( + "testing" +) + +// Our benchmark loop is similar to the one from golang.org/x/net/trace, so we +// can compare results. +func runBench(b *testing.B, events int) { + nTraces := (b.N + events + 1) / events + + for i := 0; i < nTraces; i++ { + tr := New("bench", "test") + for j := 0; j < events; j++ { + tr.Printf("%d", j) + } + tr.Finish() + } +} + +func BenchmarkTrace_2(b *testing.B) { + runBench(b, 2) +} + +func BenchmarkTrace_10(b *testing.B) { + runBench(b, 10) +} + +func BenchmarkTrace_100(b *testing.B) { + runBench(b, 100) +} + +func BenchmarkTrace_1000(b *testing.B) { + runBench(b, 1000) +} + +func BenchmarkTrace_10000(b *testing.B) { + runBench(b, 10000) +} + +func BenchmarkNewAndFinish(b *testing.B) { + for i := 0; i < b.N; i++ { + tr := New("bench", "test") + tr.Finish() + } +} + +func BenchmarkPrintf(b *testing.B) { + tr := New("bench", "test") + defer tr.Finish() + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Keep this without any formatting, so we measure our code instead of + // the performance of fmt.Sprintf. + tr.Printf("this is printf") + } +} diff --git a/internal/nettrace/context.go b/internal/nettrace/context.go new file mode 100644 index 0000000..572e4e4 --- /dev/null +++ b/internal/nettrace/context.go @@ -0,0 +1,40 @@ +package nettrace + +import "context" + +type ctxKeyT string + +const ctxKey ctxKeyT = "blitiri.com.ar/go/nettrace" + +// NewContext returns a new context with the given trace attached. +func NewContext(ctx context.Context, tr Trace) context.Context { + return context.WithValue(ctx, ctxKey, tr) +} + +// FromContext returns the trace attached to the given context (if any). +func FromContext(ctx context.Context) (Trace, bool) { + tr, ok := ctx.Value(ctxKey).(Trace) + return tr, ok +} + +// FromContextOrNew returns the trace attached to the given context, or a new +// trace if there is none. +func FromContextOrNew(ctx context.Context, family, title string) (Trace, context.Context) { + tr, ok := FromContext(ctx) + if ok { + return tr, ctx + } + + tr = New(family, title) + return tr, NewContext(ctx, tr) +} + +// ChildFromContext returns a new trace that is a child of the one attached to +// the context (if any). +func ChildFromContext(ctx context.Context, family, title string) Trace { + parent, ok := FromContext(ctx) + if ok { + return parent.NewChild(family, title) + } + return New(family, title) +} diff --git a/internal/nettrace/context_test.go b/internal/nettrace/context_test.go new file mode 100644 index 0000000..11eb401 --- /dev/null +++ b/internal/nettrace/context_test.go @@ -0,0 +1,66 @@ +package nettrace + +import ( + "context" + "testing" +) + +func TestContext(t *testing.T) { + tr := New("TestContext", "trace") + defer tr.Finish() + + // Attach the trace to a new context. + ctx := NewContext(context.Background(), tr) + + // Get the trace back from the context. + { + tr2, ok := FromContext(ctx) + if !ok { + t.Errorf("Context with trace returned not found") + } + if tr != tr2 { + t.Errorf("Trace from context is different: %v != %v", tr, tr2) + } + } + + // Create a child trace from the context. + { + tr3 := ChildFromContext(ctx, "TestContext", "child") + if p := tr3.(*trace).Parent; p != tr { + t.Errorf("Child doesn't have the right parent: %v != %v", p, tr) + } + tr3.Finish() + } + + // FromContextOrNew returns the one from the context. + { + tr4, ctx4 := FromContextOrNew(ctx, "TestContext", "from-ctx") + if ctx4 != ctx { + t.Errorf("Got new context: %v != %v", ctx4, ctx) + } + if tr4 != tr { + t.Errorf("Context with trace returned new trace: %v != %v", tr4, tr) + } + } + + // FromContextOrNew needs to create a new one. + { + tr5, ctx5 := FromContextOrNew( + context.Background(), "TestContext", "tr5") + if tr, _ := FromContext(ctx5); tr != tr5 { + t.Errorf("Context with trace returned the wrong trace: %v != %v", + tr, tr5) + } + tr5.Finish() + } + + // Child from a context that has no trace attached. + { + tr6 := ChildFromContext( + context.Background(), "TestContext", "child") + tr6.Finish() + if p := tr6.(*trace).Parent; p != nil { + t.Errorf("Expected orphan trace, it has a parent: %v", p) + } + } +} diff --git a/internal/nettrace/evtring.go b/internal/nettrace/evtring.go new file mode 100644 index 0000000..026e8e3 --- /dev/null +++ b/internal/nettrace/evtring.go @@ -0,0 +1,42 @@ +package nettrace + +import "time" + +type evtRing struct { + evts []event + max int + pos int // Points to the latest element. + firstDrop time.Time +} + +func newEvtRing(n int) *evtRing { + return &evtRing{ + max: n, + pos: -1, + } +} + +func (r *evtRing) Add(e *event) { + if len(r.evts) < r.max { + r.evts = append(r.evts, *e) + r.pos++ + return + } + + r.pos = (r.pos + 1) % r.max + + // Record the first drop as the time of the first dropped message. + if r.firstDrop.IsZero() { + r.firstDrop = r.evts[r.pos].When + } + + r.evts[r.pos] = *e +} + +func (r *evtRing) Do(f func(e *event)) { + for i := 0; i < len(r.evts); i++ { + // Go from older to newer by starting at (r.pos+1). + pos := (r.pos + 1 + i) % len(r.evts) + f(&r.evts[pos]) + } +} diff --git a/internal/nettrace/histogram.go b/internal/nettrace/histogram.go new file mode 100644 index 0000000..662a887 --- /dev/null +++ b/internal/nettrace/histogram.go @@ -0,0 +1,71 @@ +package nettrace + +import ( + "time" +) + +type histogram struct { + count [nBuckets]uint64 + + totalQ uint64 + totalT time.Duration + min time.Duration + max time.Duration +} + +func (h *histogram) Add(bucket int, latency time.Duration) { + if h.totalQ == 0 || h.min > latency { + h.min = latency + } + if h.max < latency { + h.max = latency + } + + h.count[bucket]++ + h.totalQ++ + h.totalT += latency +} + +type histSnapshot struct { + Counts map[time.Duration]line + Count uint64 + Avg, Min, Max time.Duration +} + +type line struct { + Start time.Duration + BucketIdx int + Count uint64 + Percent float32 + CumPct float32 +} + +func (h *histogram) Snapshot() *histSnapshot { + s := &histSnapshot{ + Counts: map[time.Duration]line{}, + Count: h.totalQ, + Min: h.min, + Max: h.max, + } + + if h.totalQ > 0 { + s.Avg = time.Duration(uint64(h.totalT) / h.totalQ) + } + + var cumCount uint64 + for i := 0; i < nBuckets; i++ { + cumCount += h.count[i] + l := line{ + Start: buckets[i], + BucketIdx: i, + Count: h.count[i], + } + if h.totalQ > 0 { + l.Percent = float32(h.count[i]) / float32(h.totalQ) * 100 + l.CumPct = float32(cumCount) / float32(h.totalQ) * 100 + } + s.Counts[buckets[i]] = l + } + + return s +} diff --git a/internal/nettrace/histogram_test.go b/internal/nettrace/histogram_test.go new file mode 100644 index 0000000..c6c5ab4 --- /dev/null +++ b/internal/nettrace/histogram_test.go @@ -0,0 +1,29 @@ +package nettrace + +import ( + "testing" + "time" +) + +func TestHistogramBasic(t *testing.T) { + h := histogram{} + h.Add(1, 1*time.Millisecond) + + snap := h.Snapshot() + if snap.Count != 1 || + snap.Min != 1*time.Millisecond || + snap.Max != 1*time.Millisecond || + snap.Avg != 1*time.Millisecond { + t.Errorf("expected snapshot with only 1 sample, got %v", snap) + } +} + +func TestHistogramEmpty(t *testing.T) { + h := histogram{} + snap := h.Snapshot() + + if len(snap.Counts) != nBuckets || snap.Count != 0 || + snap.Avg != 0 || snap.Min != 0 || snap.Max != 0 { + t.Errorf("expected zero snapshot, got %v", snap) + } +} diff --git a/internal/nettrace/http.go b/internal/nettrace/http.go new file mode 100644 index 0000000..2f51acc --- /dev/null +++ b/internal/nettrace/http.go @@ -0,0 +1,259 @@ +package nettrace + +import ( + "bytes" + "embed" + "fmt" + "hash/crc32" + "html/template" + "math" + "net/http" + "sort" + "strconv" + "time" +) + +//go:embed "templates/*.tmpl" "templates/*.css" +var templatesFS embed.FS +var top *template.Template + +func init() { + top = template.Must( + template.New("_top").Funcs(template.FuncMap{ + "stripZeros": stripZeros, + "roundSeconds": roundSeconds, + "roundDuration": roundDuration, + "colorize": colorize, + "depthspan": depthspan, + "shorttitle": shorttitle, + "traceemoji": traceemoji, + }).ParseFS(templatesFS, "templates/*")) +} + +// RegisterHandler registers a the trace handler in the given ServeMux, on +// `/debug/traces`. +func RegisterHandler(mux *http.ServeMux) { + mux.HandleFunc("/debug/traces", RenderTraces) +} + +// RenderTraces is an http.Handler that renders the tracing information. +func RenderTraces(w http.ResponseWriter, req *http.Request) { + data := &struct { + Buckets *[]time.Duration + FamTraces map[string]*familyTraces + + // When displaying traces for a specific family. + Family string + Bucket int + BucketStr string + AllGT bool + Traces []*trace + + // When displaying latencies for a specific family. + Latencies *histSnapshot + + // When displaying a specific trace. + Trace *trace + AllEvents []traceAndEvent + + // Error to show to the user. + Error string + }{} + + // Reference the common buckets, no need to copy them. + data.Buckets = &buckets + + // Copy the family traces map, so we don't have to keep it locked for too + // long. We'll still need to lock individual entries. + data.FamTraces = copyFamilies() + + // Default to showing greater-than. + data.AllGT = true + if all := req.FormValue("all"); all != "" { + data.AllGT, _ = strconv.ParseBool(all) + } + + // Fill in the family related parameters. + if fam := req.FormValue("fam"); fam != "" { + if _, ok := data.FamTraces[fam]; !ok { + data.Family = "" + data.Error = "Unknown family" + w.WriteHeader(http.StatusNotFound) + goto render + } + data.Family = fam + + if bs := req.FormValue("b"); bs != "" { + i, err := strconv.Atoi(bs) + if err != nil { + data.Error = "Invalid bucket (not a number)" + w.WriteHeader(http.StatusBadRequest) + goto render + } else if i < -2 || i >= nBuckets { + data.Error = "Invalid bucket number" + w.WriteHeader(http.StatusBadRequest) + goto render + } + data.Bucket = i + data.Traces = data.FamTraces[data.Family].TracesFor(i, data.AllGT) + + switch i { + case -2: + data.BucketStr = "errors" + case -1: + data.BucketStr = "active" + default: + data.BucketStr = buckets[i].String() + } + } + } + + if lat := req.FormValue("lat"); data.Family != "" && lat != "" { + data.Latencies = data.FamTraces[data.Family].Latencies() + } + + if traceID := req.FormValue("trace"); traceID != "" { + refID := req.FormValue("ref") + tr := findInFamilies(id(traceID), id(refID)) + if tr == nil { + data.Error = "Trace not found" + w.WriteHeader(http.StatusNotFound) + goto render + } + data.Trace = tr + data.Family = tr.Family + data.AllEvents = allEvents(tr) + } + +render: + + // Write into a buffer, to avoid accidentally holding a lock on http + // writes. It shouldn't happen, but just to be extra safe. + bw := &bytes.Buffer{} + bw.Grow(16 * 1024) + err := top.ExecuteTemplate(bw, "index.html.tmpl", data) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + panic(err) + } + + w.Write(bw.Bytes()) +} + +type traceAndEvent struct { + Trace *trace + Event event + Depth uint +} + +// allEvents gets all the events for the trace and its children/linked traces; +// and returns them sorted by timestamp. +func allEvents(tr *trace) []traceAndEvent { + // Map tracking all traces we've seen, to avoid loops. + seen := map[id]bool{} + + // Recursively gather all events. + evts := appendAllEvents(tr, []traceAndEvent{}, seen, 0) + + // Sort them by time. + sort.Slice(evts, func(i, j int) bool { + return evts[i].Event.When.Before(evts[j].Event.When) + }) + + return evts +} + +func appendAllEvents(tr *trace, evts []traceAndEvent, seen map[id]bool, depth uint) []traceAndEvent { + if seen[tr.ID] { + return evts + } + seen[tr.ID] = true + + subTraces := []*trace{} + + // Append all events of this trace. + trevts := tr.Events() + for _, e := range trevts { + evts = append(evts, traceAndEvent{tr, e, depth}) + if e.Ref != nil { + subTraces = append(subTraces, e.Ref) + } + } + + for _, t := range subTraces { + evts = appendAllEvents(t, evts, seen, depth+1) + } + + return evts +} + +func stripZeros(d time.Duration) string { + if d < time.Second { + _, frac := math.Modf(d.Seconds()) + return fmt.Sprintf(" .%6d", int(frac*1000000)) + } + return fmt.Sprintf("%.6f", d.Seconds()) +} + +func roundSeconds(d time.Duration) string { + return fmt.Sprintf("%.6f", d.Seconds()) +} + +func roundDuration(d time.Duration) time.Duration { + return d.Round(time.Millisecond) +} + +func colorize(depth uint, id id) template.CSS { + if depth == 0 { + return template.CSS("rgba(var(--text-color))") + } + + if depth > 3 { + depth = 3 + } + + // Must match the number of nested color variables in the CSS. + colori := crc32.ChecksumIEEE([]byte(id)) % 6 + return template.CSS( + fmt.Sprintf("var(--nested-d%02d-c%02d)", depth, colori)) +} + +func depthspan(depth uint) template.HTML { + s := `<span class="depth">` + switch depth { + case 0: + case 1: + s += "· " + case 2: + s += "· · " + case 3: + s += "· · · " + case 4: + s += "· · · · " + default: + s += fmt.Sprintf("· (%d) · ", depth) + } + + s += `</span>` + return template.HTML(s) +} + +// Hand-picked emojis that have enough visual differences in most common +// renderings, and are common enough to be able to easily describe them. +var emojids = []rune(`😀🤣😇🥰🤧😈🤡👻👽🤖👋✊🦴👅` + + `🐒🐕🦊🐱🐯🐎🐄🐷🐑🐐🐪🦒🐘🐀🦇🐓🦆🦚🦜🐢🐍🦖🐋🐟🦈🐙` + + `🦋🐜🐝🪲🌻🌲🍉🍌🍍🍎🍑🥕🍄` + + `🧀🍦🍰🧉🚂🚗🚜🛵🚲🛼🪂🚀🌞🌈🌊⚽`) + +func shorttitle(tr *trace) string { + all := tr.Family + " - " + tr.Title + if len(all) > 20 { + all = "..." + all[len(all)-17:] + } + return all +} + +func traceemoji(id id) string { + i := crc32.ChecksumIEEE([]byte(id)) % uint32(len(emojids)) + return string(emojids[i]) +} diff --git a/internal/nettrace/http_test.go b/internal/nettrace/http_test.go new file mode 100644 index 0000000..24c82f6 --- /dev/null +++ b/internal/nettrace/http_test.go @@ -0,0 +1,255 @@ +package nettrace + +import ( + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" + "time" +) + +func getValues(t *testing.T, vs url.Values, code int) string { + t.Helper() + + req := httptest.NewRequest("GET", "/debug/traces?"+vs.Encode(), nil) + w := httptest.NewRecorder() + RenderTraces(w, req) + + resp := w.Result() + body, _ := io.ReadAll(resp.Body) + + if resp.StatusCode != code { + t.Errorf("expected %d, got %v", code, resp) + } + + return string(body) +} + +type v struct { + fam, b, lat, trace, ref, all string +} + +func getCode(t *testing.T, vs v, code int) string { + t.Helper() + + u := url.Values{} + if vs.fam != "" { + u.Add("fam", vs.fam) + } + if vs.b != "" { + u.Add("b", vs.b) + } + if vs.lat != "" { + u.Add("lat", vs.lat) + } + if vs.trace != "" { + u.Add("trace", vs.trace) + } + if vs.ref != "" { + u.Add("ref", vs.ref) + } + if vs.all != "" { + u.Add("all", vs.all) + } + + return getValues(t, u, code) +} + +func get(t *testing.T, fam, b, lat, trace, ref, all string) string { + t.Helper() + return getCode(t, v{fam, b, lat, trace, ref, all}, 200) +} + +func getErr(t *testing.T, fam, b, lat, trace, ref, all string, code int, err string) string { + t.Helper() + body := getCode(t, v{fam, b, lat, trace, ref, all}, code) + if !strings.Contains(body, err) { + t.Errorf("Body does not contain error message %q", err) + t.Logf("Body: %v", body) + } + + return body +} + +func checkContains(t *testing.T, body, s string) { + t.Helper() + if !strings.Contains(body, s) { + t.Errorf("Body does not contain %q", s) + t.Logf("Body: %v", body) + } +} + +func TestHTTP(t *testing.T) { + tr := New("TestHTTP", "http") + tr.Printf("entry #1") + tr.Finish() + + tr = New("TestHTTP", "http") + tr.Printf("entry #2") + tr.Finish() + + tr = New("TestHTTP", "http") + tr.Errorf("entry #3 (error)") + tr.Finish() + + tr = New("TestHTTP", "http") + tr.Printf("hola marola") + tr.Printf("entry #4") + // This one is active until the end. + defer tr.Finish() + + // Get the plain index. + body := get(t, "", "", "", "", "", "") + checkContains(t, body, "TestHTTP") + + // Get a specific family, but no bucket. + body = get(t, "TestHTTP", "", "", "", "", "") + checkContains(t, body, "TestHTTP") + + // Get a family and active bucket. + body = get(t, "TestHTTP", "-1", "", "", "", "") + checkContains(t, body, "hola marola") + + // Get a family and error bucket. + body = get(t, "TestHTTP", "-2", "", "", "", "") + checkContains(t, body, "entry #3 (error)") + + // Get a family and first bucket. + body = get(t, "TestHTTP", "0", "", "", "", "") + checkContains(t, body, "entry #2") + + // Latency view. There are 3 events because the 4th is active. + body = get(t, "TestHTTP", "", "lat", "", "", "") + checkContains(t, body, "Count: 3") + + // Get a specific trace. No family given, since it shouldn't be needed (we + // take it from the id). + body = get(t, "", "", "", string(tr.(*trace).ID), "", "") + checkContains(t, body, "hola marola") + + // Check the "all=true" views. + body = get(t, "TestHTTP", "0", "", "", "", "true") + checkContains(t, body, "entry #2") + checkContains(t, body, "?fam=TestHTTP&b=-2&all=true") + + tr.Finish() +} + +func TestHTTPLong(t *testing.T) { + // Test a long trace. + tr := New("TestHTTPLong", "verbose") + for i := 0; i < 1000; i++ { + tr.Printf("entry #%d", i) + } + tr.Finish() + get(t, "TestHTTPLong", "", "", string(tr.(*trace).ID), "", "") +} + +func TestHTTPErrors(t *testing.T) { + tr := New("TestHTTPErrors", "http") + tr.Printf("entry #1") + tr.Finish() + + // Unknown family. + getErr(t, "unkfamily", "", "", "", "", "", + 404, "Unknown family") + + // Invalid bucket. + getErr(t, "TestHTTPErrors", "abc", "", "", "", "", + 400, "Invalid bucket") + getErr(t, "TestHTTPErrors", "-3", "", "", "", "", + 400, "Invalid bucket") + getErr(t, "TestHTTPErrors", "9", "", "", "", "", + 400, "Invalid bucket") + + // Unknown trace id (malformed). + getErr(t, "TestHTTPErrors", "", "", "unktrace", "", "", + 404, "Trace not found") + + // Unknown trace id. + getErr(t, "TestHTTPErrors", "", "", string(tr.(*trace).ID)+"xxx", "", "", + 404, "Trace not found") + + // Check that the trace is actually there. + get(t, "", "", "", string(tr.(*trace).ID), "", "") +} + +func TestHTTPUroboro(t *testing.T) { + trA := New("TestHTTPUroboro", "trA") + defer trA.Finish() + trA.Printf("this is trace A") + + trB := New("TestHTTPUroboro", "trB") + defer trB.Finish() + trB.Printf("this is trace B") + + trA.Link(trB, "B is my friend") + trB.Link(trA, "A is my friend") + + // Check that we handle cross-linked events well. + get(t, "TestHTTPUroboro", "", "", "", "", "") + get(t, "TestHTTPUroboro", "-1", "", "", "", "") + get(t, "", "", "", string(trA.(*trace).ID), "", "") + get(t, "", "", "", string(trB.(*trace).ID), "", "") +} + +func TestHTTPDeep(t *testing.T) { + tr := New("TestHTTPDeep", "level-0") + defer tr.Finish() + ts := []Trace{tr} + for i := 1; i <= 9; i++ { + tr = tr.NewChild("TestHTTPDeep", fmt.Sprintf("level-%d", i)) + defer tr.Finish() + ts = append(ts, tr) + } + + // Active view. + body := get(t, "TestHTTPDeep", "-1", "", "", "", "") + checkContains(t, body, "level-9") + + // Recursive view. + body = get(t, "TestHTTPDeep", "", "", string(ts[0].(*trace).ID), "", "") + checkContains(t, body, "level-9") +} + +func TestStripZeros(t *testing.T) { + cases := []struct { + d time.Duration + exp string + }{ + {0 * time.Second, " . 0"}, + {1 * time.Millisecond, " . 1000"}, + {5 * time.Millisecond, " . 5000"}, + {1 * time.Second, "1.000000"}, + {1*time.Second + 8*time.Millisecond, "1.008000"}, + } + for _, c := range cases { + if got := stripZeros(c.d); got != c.exp { + t.Errorf("stripZeros(%s) got %q, expected %q", + c.d, got, c.exp) + } + } +} + +func TestRegisterHandler(t *testing.T) { + mux := http.NewServeMux() + RegisterHandler(mux) + + req := httptest.NewRequest("GET", "/debug/traces", nil) + w := httptest.NewRecorder() + mux.ServeHTTP(w, req) + + resp := w.Result() + + if resp.StatusCode != 200 { + t.Errorf("expected 200, got %v", resp) + } + + body, _ := io.ReadAll(resp.Body) + if !strings.Contains(string(body), "<h1>Traces</h1>") { + t.Errorf("unexpected body: %s", body) + } +} diff --git a/internal/nettrace/templates/_latency.html.tmpl b/internal/nettrace/templates/_latency.html.tmpl new file mode 100644 index 0000000..07c026a --- /dev/null +++ b/internal/nettrace/templates/_latency.html.tmpl @@ -0,0 +1,25 @@ +<table class="latencies"><tr> +<td>Count: {{.Latencies.Count}}</td> +<td>Avg: {{.Latencies.Avg | roundDuration}}</td> +<td>Min: {{.Latencies.Min | roundDuration}}</td> +<td>Max: {{.Latencies.Max | roundDuration}}</td> +</tr></table> +<p> + +<table class="latencies"> +<tr><th>Bucket</th><th>Count</th><th>%</th><th></th><th>Cumulative</th></tr> +{{range .Latencies.Counts}} +<tr> + <td> + <a href="?fam={{$.Family}}&b={{.BucketIdx}}" + {{if eq .Count 0}}class="muted"{{end}}> + ≥{{.Start}}</a> + </td> + <td>{{.Count}}</td> + <td>{{.Percent | printf "%5.2f"}}%</td> + <td><meter max="100" value="{{.Percent}}"> + {{.Percent | printf "%.2f"}}%</meter> + <td>{{.CumPct | printf "%5.2f"}}%</td> +</tr> +{{end}} +</table> diff --git a/internal/nettrace/templates/_recursive.html.tmpl b/internal/nettrace/templates/_recursive.html.tmpl new file mode 100644 index 0000000..178624d --- /dev/null +++ b/internal/nettrace/templates/_recursive.html.tmpl @@ -0,0 +1,51 @@ +{{if .Trace.Parent}} +<a href="?trace={{.Trace.Parent.ID}}&ref={{.Trace.ID}}"> + Parent: {{.Trace.Parent.Family}} - {{.Trace.Parent.Title}}</a> +<p> +{{end}} + +<table class="trace"> + +<thead> +<tr> + <th>Trace</th> + <th>Timestamp</th> + <th>Elapsed (s)</th> + <th></th> + <th>Message</th> +</tr> +</thead> + +<tbody> +{{$prev := .Trace.Start}} +{{range .AllEvents}} +<tr style="background: {{colorize .Depth .Trace.ID}};"> +<td title='{{.Trace.Family}} - {{.Trace.Title}} +@ {{.Trace.Start.Format "2006-01-02 15:04:05.999999" }}'> + {{shorttitle .Trace}}</td> + +<td class="when">{{.Event.When.Format "15:04:05.000000"}}</td> +<td class="duration">{{(.Event.When.Sub $prev) | stripZeros}}</td> +<td class="emoji" title='{{.Trace.Family}} - {{.Trace.Title}} +@ {{.Trace.Start.Format "2006-01-02 15:04:05.999999" }}'> + <div class="emoji">{{traceemoji .Trace.ID}}</div></td> +<td class="msg"> + {{- depthspan .Depth -}} + {{- if .Event.Type.IsLog -}} + {{.Event.Msg}} + {{- else if .Event.Type.IsChild -}} + new child: <a href="?trace={{.Event.Ref.ID}}&ref={{.Trace.ID}}">{{.Event.Ref.Family}} - {{.Event.Ref.Title}}</a> + {{- else if .Event.Type.IsLink -}} + <a href="?trace={{.Event.Ref.ID}}&ref={{.Trace.ID}}">{{.Event.Msg}}</a> + {{- else if .Event.Type.IsDrop -}} + <b><i>[ events dropped ]</i></b> + {{- else -}} + <i>[ unknown event type ]</i> + {{- end -}} +</td> +</tr> +{{$prev = .Event.When}} +{{end}} + +</tbody> +</table> diff --git a/internal/nettrace/templates/_single.html.tmpl b/internal/nettrace/templates/_single.html.tmpl new file mode 100644 index 0000000..2e75464 --- /dev/null +++ b/internal/nettrace/templates/_single.html.tmpl @@ -0,0 +1,35 @@ +<tr class="title"> +<td class="when">{{.Start.Format "2006-01-02 15:04:05.000000"}}</td> +<td class="duration">{{.Duration | roundSeconds}}</td> +<td><a href="?trace={{.ID}}">{{.Title}}</a> + {{if .Parent}}(parent: <a href="?trace={{.Parent.ID}}&ref={{.ID}}"> + {{.Parent.Family}} - {{.Parent.Title}}</a>) + {{end}} +</td> +<tr> + +{{$prev := .Start}} +{{range .Events}} +<tr> +<td class="when">{{.When.Format "15:04:05.000000"}}</td> +<td class="duration">{{(.When.Sub $prev) | stripZeros}}</td> +<td class="msg"> + {{- if .Type.IsLog -}} + {{.Msg}} + {{- else if .Type.IsChild -}} + new child <a href="?trace={{.Ref.ID}}&ref={{$.ID}}">{{.Ref.Family}} {{.Ref.Title}}</a> + {{- else if .Type.IsLink -}} + <a href="?trace={{.Ref.ID}}&ref={{$.ID}}">{{.Msg}}</a> + {{- else if .Type.IsDrop -}} + <i>[ events dropped ]</i> + {{- else -}} + <i>[ unknown event type ]</i> + {{- end -}} +</td> +</tr> +{{$prev = .When}} +{{end}} + +<tr> +<td> </td> +</tr> diff --git a/internal/nettrace/templates/index.html.tmpl b/internal/nettrace/templates/index.html.tmpl new file mode 100644 index 0000000..8909c34 --- /dev/null +++ b/internal/nettrace/templates/index.html.tmpl @@ -0,0 +1,129 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<style> +{{template "style.css"}} +</style> + +<title> +{{if .Trace}}{{.Trace.Family}} - {{.Trace.Title}} +{{else if .BucketStr}}{{.Family}} - {{.BucketStr}} +{{else if .Latencies}}{{.Family}} - latency +{{else}}Traces +{{end}} +</title> + +</head> + +<body> +<h1>Traces</h1> + +<table class="index"> +{{range $name, $ftr := .FamTraces}} +<tr> + <td class="family"> + <a href="?fam={{$name}}&b=0&all=true"> + {{if eq $name $.Family}}<u>{{end}} + {{$name}} + {{if eq $name $.Family}}</u>{{end}} + </a> + </td> + + <td class="bucket active"> + {{$n := $ftr.LenActive}} + {{if and (eq $name $.Family) (eq "active" $.BucketStr)}}<u>{{end}} + + <a href="?fam={{$name}}&b=-1&all={{$.AllGT}}" + {{if eq $n 0}}class="muted"{{end}}> + {{$n}} active</a> + + {{if and (eq $name $.Family) (eq "active" $.BucketStr)}}</u>{{end}} + </td> + + {{range $i, $b := $.Buckets}} + <td class="bucket"> + {{$n := $ftr.LenBucket $i}} + {{if and (eq $name $.Family) (eq $b.String $.BucketStr)}}<u>{{end}} + + <a href="?fam={{$name}}&b={{$i}}&all={{$.AllGT}}" + {{if eq $n 0}}class="muted"{{end}}> + ≥{{$b}}</a> + + {{if and (eq $name $.Family) (eq $b.String $.BucketStr)}}</u>{{end}} + </td> + {{end}} + + <td class="bucket"> + {{$n := $ftr.LenErrors}} + {{if and (eq $name $.Family) (eq "errors" $.BucketStr)}}<u>{{end}} + + <a href="?fam={{$name}}&b=-2&all={{$.AllGT}}" + {{if eq $n 0}}class="muted"{{end}}> + errors</a> + + {{if and (eq $name $.Family) (eq "errors" $.BucketStr)}}</u>{{end}} + </td> + + <td class="bucket"> + <a href="?fam={{$name}}&lat=true&all={{$.AllGT}}">[latency]</a> + </td> +</tr> +{{end}} +</table> +<br> +Show: <a href="?fam={{.Family}}&b={{.Bucket}}&all=false"> + {{if not .AllGT}}<u>{{end}} + Only in bucket</a> + {{if not .AllGT}}</u>{{end}} +/ +<a href="?fam={{.Family}}&b={{.Bucket}}&all=true"> + {{if .AllGT}}<u>{{end}} + All ≥ bucket</a> + {{if .AllGT}}</u>{{end}} +<p> + +<!---------------------------------------------> +{{if .Error}} +<p class="error">Error: {{.Error}}</p> +{{end}} + +<!---------------------------------------------> +{{if .BucketStr}} +<h2>{{.Family}} - {{.BucketStr}}</h2> + +<table class="trace"> +<thead> +<tr> + <th>Timestamp</th> + <th>Elapsed (s)</th> + <th>Message</th> +</tr> +</thead> +<tbody> +<tr> +<td> </td> +</tr> +{{range .Traces}} +{{template "_single.html.tmpl" .}}<p> +{{end}} +</tbody> +</table> + +<p> +{{end}} + +<!---------------------------------------------> +{{if .Latencies}} +<h2>{{.Family}} - latency</h2> +{{template "_latency.html.tmpl" .}}<p> +{{end}} + +<!---------------------------------------------> +{{if .Trace}} +<h2>{{.Trace.Family}} - <i>{{.Trace.Title}}</i></h2> +{{template "_recursive.html.tmpl" .}}<p> +{{end}} + +</body> + +</html> diff --git a/internal/nettrace/templates/style.css b/internal/nettrace/templates/style.css new file mode 100644 index 0000000..077db1e --- /dev/null +++ b/internal/nettrace/templates/style.css @@ -0,0 +1,187 @@ +:root { + --text-color: black; + --bg-color: #fffff7; + --zebra-bg-color: #eeeee7; + --muted-color: #444; + --xmuted-color: #a1a1a1; + --link-color: #39c; + --link-hover: #069; + --underline-color: grey; + --error-color: red; + + /* Colors for the nested zebras. */ + --nested-d01-c00: #ffebee; + --nested-d01-c01: #ede7f6; + --nested-d01-c02: #e3f2fd; + --nested-d01-c03: #e8f5e9; + --nested-d01-c04: #fff8e1; + --nested-d01-c05: #efebe9; + --nested-d02-c00: #f0dcdf; + --nested-d02-c01: #ded8e7; + --nested-d02-c02: #d4e3ee; + --nested-d02-c03: #d9e6da; + --nested-d02-c04: #f0e9d2; + --nested-d02-c05: #e0dcda; + --nested-d03-c00: #e1cdd0; + --nested-d03-c01: #cfc9d8; + --nested-d03-c02: #c5d4df; + --nested-d03-c03: #cad7cb; + --nested-d03-c04: #e1dac3; + --nested-d03-c05: #d1cdcb; +} + +@media (prefers-color-scheme: dark) { + :root { + --text-color: rgba(255, 255, 255, 0.90); + --bg-color: #121212; + --zebra-bg-color: #222222; + --muted-color: #aaaaaa; + --xmuted-color: #a1a1a1; + --link-color: #44b4ec; + --link-hover: #7fc9ee; + --underline-color: grey; + --error-color: #dd4747; + + /* Colors for the nested zebras. */ + --nested-d01-c00: #220212; + --nested-d01-c01: #1c1c22; + --nested-d01-c02: #001e20; + --nested-d01-c03: #0f0301; + --nested-d01-c04: #201d06; + --nested-d01-c05: #00192b; + --nested-d02-c00: #311121; + --nested-d02-c01: #2b2b31; + --nested-d02-c02: #0f2d2f; + --nested-d02-c03: #1e1210; + --nested-d02-c04: #2f2c15; + --nested-d02-c05: #0f283a; + --nested-d03-c00: #402030; + --nested-d03-c01: #3a3a40; + --nested-d03-c02: #1e3c3e; + --nested-d03-c03: #2d211f; + --nested-d03-c04: #3e3b24; + --nested-d03-c05: #1e3749; + } +} + +body { + font-family: sans-serif; + color: var(--text-color); + background: var(--bg-color); +} + +p.error { + color: var(--error-color); + font-size: large; +} + +a { + color: var(--link-color); + text-decoration: none; +} + +a:hover { + color: var(--link-hover); +} + +.family a { + color: var(--text-color); +} + +u { + text-decoration-color: var(--underline-color); +} + +table.index { + border-collapse: collapse; +} + +table.index tr:nth-child(odd) { + + background-color: var(--zebra-bg-color); +} + +table.index td { + padding: 0.25em 0.5em; +} + +table.index td.bucket { + min-width: 2em; + text-align: center; +} + +table.index td.active { + /* Make the "active" column wider so there's less jumping on refresh. */ + min-width: 5em; + text-align: right; +} + +table.index a { + text-decoration: none; +} + +a.muted { + color: var(--muted-color); +} + +table.trace { + font-family: monospace; + border-collapse: collapse; +} + +table.trace thead { + border-bottom: 1px solid var(--text-color); +} + +table.trace th { + text-align: left; + padding: 0.1em 1em; +} + +table.trace tr.title { + font-weight: bold; +} + +table.trace td { + padding: 0.1em 1em; +} + +table.trace td.when { + text-align: right; +} + +table.trace td.duration { + text-align: right; + white-space: pre; +} + +table.trace td.msg { + white-space: pre-wrap; +} + +span.depth { + color: var(--xmuted-color); +} + +div.emoji { + /* Emojis sometimes are rendered excessively tall. */ + /* This ensures they're sized appropriately. */ + max-height: 1.3em; + overflow: hidden; +} + +table.latencies { + text-align: right; +} + +table.latencies td { + padding: 0 0.3em; +} + +table.latencies th { + text-align: center; +} + +meter { + width: 15em; +} diff --git a/internal/nettrace/test_server.go b/internal/nettrace/test_server.go new file mode 100644 index 0000000..a25d40a --- /dev/null +++ b/internal/nettrace/test_server.go @@ -0,0 +1,243 @@ +//go:build ignore + +package main + +import ( + "flag" + "fmt" + "math/rand" + "net/http" + "time" + + _ "net/http/pprof" + + "blitiri.com.ar/go/dnss/internal/nettrace" +) + +func main() { + addr := flag.String("addr", ":8080", "listening address") + flag.Parse() + + go RandomEvents("random-one") + go RandomEvents("random-two") + go RandomEvents("random-three") + go RandomLongEvent("random-long", "long-one") + go RandomLongEvent("random-long", "long-two") + go RandomLongEvent("random-long", "long-three") + go RandomBunny("random-bunny", "first 🐇") + go RandomNested("random-nested") + go RandomLazy("random-lazy") + + http.DefaultServeMux.Handle("/", + WithLogging(http.HandlerFunc(HandleRoot))) + + http.DefaultServeMux.Handle("/debug/traces", + WithLogging(http.HandlerFunc(nettrace.RenderTraces))) + + fmt.Printf("listening on %s\n", *addr) + http.ListenAndServe(*addr, nil) +} + +func RandomEvents(family string) { + for i := 0; ; i++ { + tr := nettrace.New(family, fmt.Sprintf("evt-%d", i)) + randomTrace(family, tr) + } +} + +func randomTrace(family string, tr nettrace.Trace) { + tr.Printf("this is a random event") + tr.Printf("and it has a random delay") + delay := time.Duration(rand.Intn(1000)) * time.Millisecond + tr.Printf("sleeping %v", delay) + time.Sleep(delay) + + if rand.Intn(100) < 1 { + tr.Printf("this unlucky one is an error") + tr.SetError() + } + + if rand.Intn(100) < 10 { + tr.Printf("this one got super verbose!") + for j := 0; j < 100; j++ { + tr.Printf("message %d", j) + } + } + + if rand.Intn(100) < 30 { + tr.Printf("this one had a child") + c := tr.NewChild(family, "achild") + go randomTrace(family, c) + } + + tr.Printf("all done") + tr.Finish() +} + +func RandomLongEvent(family, title string) { + tr := nettrace.New(family, title) + tr.Printf("this is a random long event") + for i := 0; ; i++ { + delay := time.Duration(rand.Intn(100)) * time.Millisecond + time.Sleep(delay) + tr.Printf("message %d, slept %v", i, delay) + } + tr.Finish() +} + +func RandomBunny(family, title string) { + tr := nettrace.New(family, title) + tr.SetMaxEvents(100) + tr.Printf("this is the top 🐇") + for i := 0; ; i++ { + delay := time.Duration(rand.Intn(100)) * time.Millisecond + time.Sleep(delay) + tr.Printf("message %d, slept %v", i, delay) + + if rand.Intn(100) < 40 { + c := tr.NewChild(family, fmt.Sprintf("child-%d", i)) + go randomTrace(family, c) + } + + if rand.Intn(100) < 40 { + n := nettrace.New(family, fmt.Sprintf("linked-%d", i)) + go randomTrace(family, n) + tr.Link(n, "linking with this guy") + } + } + tr.Finish() +} + +func randomNested(family string, depth int, parent nettrace.Trace) { + tr := parent.NewChild(family, fmt.Sprintf("nested-%d", depth)) + defer tr.Finish() + + tr.Printf("I am a spoiled child") + + delay := time.Duration(rand.Intn(100)) * time.Millisecond + time.Sleep(delay) + tr.Printf("slept %v", delay) + + if depth > 10 { + tr.Printf("I went too far") + return + } + + // If we make this < 50, then it grows forever. + if rand.Intn(100) < 75 { + tr.Printf("I sang really well") + return + } + + max := rand.Intn(5) + for i := 0; i < max; i++ { + tr.Printf("spawning %d", i) + go randomNested(family, depth+1, tr) + } + +} +func RandomNested(family string) { + tr := nettrace.New(family, "nested-0") + for i := 0; ; i++ { + randomNested(family, 1, tr) + } +} + +func RandomLazy(family string) { + for i := 0; ; i++ { + tr := nettrace.New(family, fmt.Sprintf("evt-%d", i)) + tr.Printf("I am very lazy and do little work") + tr.Finish() + time.Sleep(500 * time.Millisecond) + } +} + +func HandleRoot(w http.ResponseWriter, r *http.Request) { + if delayS := r.FormValue("delay"); delayS != "" { + delay, err := time.ParseDuration(delayS) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + time.Sleep(delay) + } + + if withError := r.FormValue("error"); withError != "" { + tr, _ := nettrace.FromContext(r.Context()) + tr.SetError() + } + + w.Write([]byte(rootHTML)) +} + +func WithLogging(parent http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tr := nettrace.New("http", r.URL.String()) + defer tr.Finish() + + // Associate the trace with this request. + r = r.WithContext(nettrace.NewContext(r.Context(), tr)) + + tr.Printf("%s %s %s %s", + r.RemoteAddr, r.Proto, r.Method, r.URL.String()) + start := time.Now() + parent.ServeHTTP(w, r) + latency := time.Since(start) + tr.Printf("handler took %s", latency) + + // 1.2.3.4:34575 HTTP/2.0 GET /path = 1.2ms + fmt.Printf("%s %s %s %s = %s\n", + r.RemoteAddr, r.Proto, r.Method, r.URL.String(), + latency) + }) +} + +const rootHTML = ` +<html> +<head> +<style> + td { + min-width: 2em; + text-align: center; + } + input#delay { + width: 5em; + } +</style> +</head> +<body> + +<a href="/debug/traces">Traces</a><p> + +<table> + <tr> + <td>Delay:</td> + <td><a href="/?delay=0s">0s</a></td> + <td><a href="/?delay=0.1s">0.1s</a></td> + <td><a href="/?delay=0.2s">0.2s</a></td> + <td><a href="/?delay=0.5s">0.5s</a></td> + <td><a href="/?delay=1s">1s</a></td> + </tr> + + <tr> + <td>+ error:</td> + <td><a href="/?delay=0&error=on">0s</a></td> + <td><a href="/?delay=0.1s&error=on">0.1s</a></td> + <td><a href="/?delay=0.2s&error=on">0.2s</a></td> + <td><a href="/?delay=0.5s&error=on">0.5s</a></td> + <td><a href="/?delay=1s&error=on">1s</a></td> + </tr> +</table> + +<form action="/" method="get"> + <label for="delay">Custom delay:</label> + <input type="text" id="delay" name="delay" placeholder="250ms" + autofocus required> + <input type="checkbox" id="error" name="error"> + <label for="error">is error</label> + <input type="submit"> +</form> + +</body> +</html> +` diff --git a/internal/nettrace/trace.go b/internal/nettrace/trace.go new file mode 100644 index 0000000..7013e48 --- /dev/null +++ b/internal/nettrace/trace.go @@ -0,0 +1,558 @@ +// Package nettrace implements tracing of requests. Traces are created by +// nettrace.New, and can then be viewed over HTTP on /debug/traces. +package nettrace + +import ( + "container/ring" + "fmt" + "math/rand" + "sort" + "strconv" + "strings" + "sync" + "time" +) + +// IDs are of the form "family!timestamp!unique identifier", which allows for +// sorting them by time much easily, and also some convenient optimizations +// when looking up an id across all the known ones. +// Family is not escaped. It should not contain the separator. +// It is not expected to be stable, for internal use only. +type id string + +func newID(family string, ts int64) id { + return id( + family + "!" + + strconv.FormatInt(ts, 10) + "!" + + strconv.FormatUint(rand.Uint64(), 10)) +} + +func (id id) Family() string { + sp := strings.SplitN(string(id), "!", 2) + if len(sp) != 2 { + return string(id) + } + return sp[0] +} + +// Trace represents a single request trace. +type Trace interface { + // NewChild creates a new trace, that is a child of this one. + NewChild(family, title string) Trace + + // Link to another trace with the given message. + Link(other Trace, msg string) + + // SetMaxEvents sets the maximum number of events that will be stored in + // the trace. It must be called right after initialization. + SetMaxEvents(n int) + + // SetError marks that the trace was for an error event. + SetError() + + // Printf adds a message to the trace. + Printf(format string, a ...interface{}) + + // Errorf adds a message to the trace, marks it as an error, and returns + // an error for it. + Errorf(format string, a ...interface{}) error + + // Finish marks the trace as complete. + // The trace should not be used after calling this method. + Finish() +} + +// A single trace. Can be active or inactive. +// Exported fields are allowed to be accessed directly, e.g. by the HTTP +// handler. Private ones are mutex protected. +type trace struct { + ID id + + Family string + Title string + + Parent *trace + + Start time.Time + + // Fields below are mu-protected. + // We keep them unexported so they're not accidentally accessed in html + // templates. + mu sync.Mutex + + end time.Time + + isError bool + maxEvents int + + // We keep two separate groups: the first ~1/3rd events in a simple slice, + // and the last 2/3rd in a ring so we can drop events without losing the + // first ones. + cutoff int + firstEvents []event + lastEvents *evtRing +} + +type evtType uint8 + +const ( + evtLOG = evtType(1 + iota) + evtCHILD + evtLINK + evtDROP +) + +func (t evtType) IsLog() bool { return t == evtLOG } +func (t evtType) IsChild() bool { return t == evtCHILD } +func (t evtType) IsLink() bool { return t == evtLINK } +func (t evtType) IsDrop() bool { return t == evtDROP } + +type event struct { + When time.Time + Type evtType + + Ref *trace + Msg string +} + +const defaultMaxEvents = 30 + +func newTrace(family, title string) *trace { + start := time.Now() + tr := &trace{ + ID: newID(family, start.UnixNano()), + Family: family, + Title: title, + Start: start, + + maxEvents: defaultMaxEvents, + cutoff: defaultMaxEvents / 3, + } + + // Pre-allocate a couple of events to speed things up. + // Don't allocate lastEvents, that can be expensive and it is not always + // needed. No need to slow down trace creation just for it. + tr.firstEvents = make([]event, 0, 4) + + familiesMu.Lock() + ft, ok := families[family] + if !ok { + ft = newFamilyTraces() + families[family] = ft + } + familiesMu.Unlock() + + ft.mu.Lock() + ft.active[tr.ID] = tr + ft.mu.Unlock() + + return tr +} + +// New creates a new trace with the given family and title. +func New(family, title string) Trace { + return newTrace(family, title) +} + +func (tr *trace) append(evt *event) { + tr.mu.Lock() + defer tr.mu.Unlock() + + if len(tr.firstEvents) < tr.cutoff { + tr.firstEvents = append(tr.firstEvents, *evt) + return + } + + if tr.lastEvents == nil { + // The ring holds the last 2/3rds of the events. + tr.lastEvents = newEvtRing(tr.maxEvents - tr.cutoff) + } + + tr.lastEvents.Add(evt) +} + +// String is for debugging only. +func (tr *trace) String() string { + return fmt.Sprintf("trace{%s, %s, %q, %d}", + tr.Family, tr.Title, tr.ID, len(tr.Events())) +} + +func (tr *trace) NewChild(family, title string) Trace { + c := newTrace(family, title) + c.Parent = tr + + // Add the event to the parent. + evt := &event{ + When: time.Now(), + Type: evtCHILD, + Ref: c, + } + tr.append(evt) + + return c +} + +func (tr *trace) Link(other Trace, msg string) { + evt := &event{ + When: time.Now(), + Type: evtLINK, + Ref: other.(*trace), + Msg: msg, + } + tr.append(evt) +} + +func (tr *trace) SetMaxEvents(n int) { + // Set a minimum of 6, so the truncation works without running into + // issues. + if n < 6 { + n = 6 + } + tr.mu.Lock() + tr.maxEvents = n + tr.cutoff = n / 3 + tr.mu.Unlock() +} + +func (tr *trace) SetError() { + tr.mu.Lock() + tr.isError = true + tr.mu.Unlock() +} + +func (tr *trace) Printf(format string, a ...interface{}) { + evt := &event{ + When: time.Now(), + Type: evtLOG, + Msg: fmt.Sprintf(format, a...), + } + + tr.append(evt) +} + +func (tr *trace) Errorf(format string, a ...interface{}) error { + tr.SetError() + err := fmt.Errorf(format, a...) + tr.Printf(err.Error()) + return err +} + +func (tr *trace) Finish() { + tr.mu.Lock() + tr.end = time.Now() + tr.mu.Unlock() + + familiesMu.Lock() + ft := families[tr.Family] + familiesMu.Unlock() + ft.finalize(tr) +} + +// Duration of this trace. +func (tr *trace) Duration() time.Duration { + tr.mu.Lock() + start, end := tr.Start, tr.end + tr.mu.Unlock() + + if end.IsZero() { + return time.Since(start) + } + return end.Sub(start) +} + +// Events returns a copy of the trace events. +func (tr *trace) Events() []event { + tr.mu.Lock() + defer tr.mu.Unlock() + + evts := make([]event, len(tr.firstEvents)) + copy(evts, tr.firstEvents) + + if tr.lastEvents == nil { + return evts + } + + if !tr.lastEvents.firstDrop.IsZero() { + evts = append(evts, event{ + When: tr.lastEvents.firstDrop, + Type: evtDROP, + }) + } + + tr.lastEvents.Do(func(e *event) { + evts = append(evts, *e) + }) + + return evts +} + +func (tr *trace) IsError() bool { + tr.mu.Lock() + defer tr.mu.Unlock() + return tr.isError +} + +// +// Trace hierarchy +// +// Each trace belongs to a family. For each family, we have all active traces, +// and then N traces that finished <1s, N that finished <2s, etc. + +// We keep this many buckets of finished traces. +const nBuckets = 8 + +// Buckets to use. Lenght must match nBuckets. +// "Traces with a latency >= $duration". +var buckets = []time.Duration{ + time.Duration(0), + 5 * time.Millisecond, + 10 * time.Millisecond, + 50 * time.Millisecond, + 100 * time.Millisecond, + 300 * time.Millisecond, + 1 * time.Second, + 10 * time.Second, +} + +func findBucket(latency time.Duration) int { + for i, d := range buckets { + if latency >= d { + continue + } + return i - 1 + } + + return nBuckets - 1 +} + +// How many traces we keep per bucket. +const tracesInBucket = 10 + +type traceRing struct { + ring *ring.Ring + max int + l int +} + +func newTraceRing(n int) *traceRing { + return &traceRing{ + ring: ring.New(n), + max: n, + } +} + +func (r *traceRing) Add(tr *trace) { + r.ring.Value = tr + r.ring = r.ring.Next() + if r.l < r.max { + r.l++ + } +} + +func (r *traceRing) Len() int { + return r.l +} + +func (r *traceRing) Do(f func(tr *trace)) { + r.ring.Do(func(x interface{}) { + if x == nil { + return + } + f(x.(*trace)) + }) +} + +type familyTraces struct { + mu sync.Mutex + + // All active ones. + active map[id]*trace + + // The ones we decided to keep. + // Each bucket is a ring-buffer, finishedHead keeps the head pointer. + finished [nBuckets]*traceRing + + // The ones that errored have their own bucket. + errors *traceRing + + // Histogram of latencies. + latencies histogram +} + +func newFamilyTraces() *familyTraces { + ft := &familyTraces{} + ft.active = map[id]*trace{} + for i := 0; i < nBuckets; i++ { + ft.finished[i] = newTraceRing(tracesInBucket) + } + ft.errors = newTraceRing(tracesInBucket) + return ft +} + +func (ft *familyTraces) LenActive() int { + ft.mu.Lock() + defer ft.mu.Unlock() + return len(ft.active) +} + +func (ft *familyTraces) LenErrors() int { + ft.mu.Lock() + defer ft.mu.Unlock() + return ft.errors.Len() +} + +func (ft *familyTraces) LenBucket(b int) int { + ft.mu.Lock() + defer ft.mu.Unlock() + return ft.finished[b].Len() +} + +func (ft *familyTraces) TracesFor(b int, allgt bool) []*trace { + ft.mu.Lock() + defer ft.mu.Unlock() + + trs := []*trace{} + appendTrace := func(tr *trace) { + trs = append(trs, tr) + } + if b == -2 { + ft.errors.Do(appendTrace) + } else if b == -1 { + for _, tr := range ft.active { + appendTrace(tr) + } + } else if b < nBuckets { + ft.finished[b].Do(appendTrace) + if allgt { + for i := b + 1; i < nBuckets; i++ { + ft.finished[i].Do(appendTrace) + } + } + } + + // Sort them by start, newer first. This is the order that will be used + // when displaying them. + sort.Slice(trs, func(i, j int) bool { + return trs[i].Start.After(trs[j].Start) + }) + return trs +} + +func (ft *familyTraces) find(id id) *trace { + ft.mu.Lock() + defer ft.mu.Unlock() + + if tr, ok := ft.active[id]; ok { + return tr + } + + var found *trace + for _, bs := range ft.finished { + bs.Do(func(tr *trace) { + if tr.ID == id { + found = tr + } + }) + if found != nil { + return found + } + } + + ft.errors.Do(func(tr *trace) { + if tr.ID == id { + found = tr + } + }) + if found != nil { + return found + } + + return nil +} + +func (ft *familyTraces) finalize(tr *trace) { + latency := tr.end.Sub(tr.Start) + b := findBucket(latency) + + ft.mu.Lock() + + // Delete from the active list. + delete(ft.active, tr.ID) + + // Add it to the corresponding finished bucket, based on the trace + // latency. + ft.finished[b].Add(tr) + + // Errors go on their own list, in addition to the above. + if tr.isError { + ft.errors.Add(tr) + } + + ft.latencies.Add(b, latency) + + ft.mu.Unlock() +} + +func (ft *familyTraces) Latencies() *histSnapshot { + ft.mu.Lock() + defer ft.mu.Unlock() + return ft.latencies.Snapshot() +} + +// +// Global state +// + +var ( + familiesMu sync.Mutex + families = map[string]*familyTraces{} +) + +func copyFamilies() map[string]*familyTraces { + n := map[string]*familyTraces{} + + familiesMu.Lock() + for f, trs := range families { + n[f] = trs + } + familiesMu.Unlock() + + return n +} + +func findInFamilies(traceID id, refID id) *trace { + // First, try to find it via the family. + family := traceID.Family() + familiesMu.Lock() + fts, ok := families[family] + familiesMu.Unlock() + + if ok { + tr := fts.find(traceID) + if tr != nil { + return tr + } + } + + // If that fail and we have a reference, try finding via it. + // The reference can be a parent or a child. + if refID != id("") { + ref := findInFamilies(refID, "") + if ref == nil { + return nil + } + + // Is the reference's parent the one we're looking for? + if ref.Parent != nil && ref.Parent.ID == traceID { + return ref.Parent + } + + // Try to find it in the ref's events. + for _, e := range ref.Events() { + if e.Ref != nil && e.Ref.ID == traceID { + return e.Ref + } + } + } + return nil +} diff --git a/internal/nettrace/trace_test.go b/internal/nettrace/trace_test.go new file mode 100644 index 0000000..ebdb5c5 --- /dev/null +++ b/internal/nettrace/trace_test.go @@ -0,0 +1,215 @@ +package nettrace + +import ( + "fmt" + "strings" + "testing" + "time" +) + +func expectEvents(t *testing.T, tr Trace, n int) { + t.Helper() + if evts := tr.(*trace).Events(); len(evts) != n { + t.Errorf("expected %d events, got %d", n, len(evts)) + t.Logf("%v", evts) + } +} + +func TestBasic(t *testing.T) { + var tr Trace = New("TestBasic", "basic") + defer tr.Finish() + tr.Printf("hola marola") + tr.Printf("hola marola 2") + + c1 := tr.NewChild("TestBasic", "basic child") + c1.Printf("hijo de la noche") + + expectEvents(t, tr, 3) + + if s := tr.(*trace).String(); !strings.Contains(s, "TestBasic, basic") { + t.Errorf("tr.String does not contain family and title: %q", s) + } +} + +func TestLong(t *testing.T) { + tr := New("TestLong", "long") + defer tr.Finish() + tr.SetMaxEvents(100) + + // First 90 events, no drop. + for i := 0; i < 90; i++ { + tr.Printf("evt %d", i) + } + expectEvents(t, tr, 90) + + // Up to 99, still no drop. + for i := 0; i < 9; i++ { + tr.Printf("evt %d", i) + } + expectEvents(t, tr, 99) + + // Note that we go up to 101 due to rounding errors, we're ok with it. + tr.Printf("evt 100") + expectEvents(t, tr, 100) + tr.Printf("evt 101") + expectEvents(t, tr, 101) + tr.Printf("evt 102") + expectEvents(t, tr, 101) + + // Add more events, expect none of them to exceed 101. + for i := 0; i < 9; i++ { + tr.Printf("evt %d", i) + expectEvents(t, tr, 101) + } +} + +func TestIsError(t *testing.T) { + tr := New("TestIsError", "long") + defer tr.Finish() + if tr.(*trace).IsError() != false { + tr.Errorf("new trace is error") + } + + tr.Errorf("this is an error") + if tr.(*trace).IsError() != true { + tr.Errorf("error not recorded properly") + } +} + +func TestFindViaRef(t *testing.T) { + parent := New("TestFindViaRef", "parent") + parentID := parent.(*trace).ID + defer parent.Finish() + child := parent.NewChild("TestFindViaRef", "child") + childID := child.(*trace).ID + defer child.Finish() + + // Basic check that both can be directly found. + if f := findInFamilies(parentID, id("")); f != parent { + t.Errorf("didn't find parent directly, found: %v", f) + } + if f := findInFamilies(childID, id("")); f != child { + t.Errorf("didn't find child directly, found: %v", f) + } + + // Hackily remove child from the active traces, to force a reference + // lookup when needed. + familiesMu.Lock() + delete(families["TestFindViaRef"].active, child.(*trace).ID) + familiesMu.Unlock() + + // Now the child should not be findable directly anymore. + if f := findInFamilies(childID, id("")); f != nil { + t.Errorf("child should not be findable directly, found: %v", f) + } + + // But we still should be able to get to it via the parent. + if f := findInFamilies(childID, parentID); f != child { + t.Errorf("didn't find child via parent, found: %v", f) + } +} + +func TestMaxEvents(t *testing.T) { + tr := trace{} + + // Test that we keep a minimum, and that the cutoff behaves as expected. + cases := []struct{ me, exp, cutoffExp int }{ + {0, 6, 2}, + {5, 6, 2}, + {6, 6, 2}, + {7, 7, 2}, + {8, 8, 2}, + {9, 9, 3}, + {12, 12, 4}, + } + for _, c := range cases { + tr.SetMaxEvents(c.me) + if got := tr.maxEvents; got != c.exp { + t.Errorf("set max events to %d, expected %d, got %d", + c.me, c.exp, got) + } + if got := tr.cutoff; got != c.cutoffExp { + t.Errorf("set max events to %d, expected cutoff %d, got %d", + c.me, c.cutoffExp, got) + } + } +} + +func TestFind(t *testing.T) { + // Make sure we find the right bucket, including for latencies above the + // last one. + for i, d := range buckets { + found := findBucket(d + 1*time.Millisecond) + if found != i { + t.Errorf("find bucket [%s + 1ms] got %d, expected %d", + d, found, i) + } + } + + // Create a family, populate it with traces in all buckets. + finished := [nBuckets]*trace{} + for i, d := range buckets { + lat := d + 1*time.Millisecond + tr := newTrace("TestFind", fmt.Sprintf("evt-%s", lat)) + tr.end = tr.Start.Add(lat) + families[tr.Family].finalize(tr) + finished[i] = tr + } + + // Also have an active trace. + activeTr := newTrace("TestFind", "active") + + // And add an error trace, which isn't on any of the other buckets (to + // simulate that they've been rotated out of the latency buckets, but are + // still around in errors) + errTr := newTrace("TestFind", "evt-err") + errTr.end = errTr.Start.Add(666 * time.Millisecond) + errTr.SetError() + delete(families[errTr.Family].active, errTr.ID) + families[errTr.Family].errors.Add(errTr) + + // Find all of them. + for i := range buckets { + found := findInFamilies(finished[i].ID, "") + if found != finished[i] { + t.Errorf("finding trace %d on bucket %s, expected %v, got %v", + i, buckets[i], finished[i], found) + } + } + if found := findInFamilies(activeTr.ID, ""); found != activeTr { + t.Errorf("finding active trace, expected %v, got %v", + activeTr, found) + } + if found := findInFamilies(errTr.ID, ""); found != errTr { + t.Errorf("finding error trace, expected %v, got %v", + errTr, found) + } + + // Non-existent traces. + if found := findInFamilies("does!notexist", ""); found != nil { + t.Errorf("finding non-existent trace, expected nil, got %v", found) + } + if found := findInFamilies("does!notexist", "does!notexist"); found != nil { + t.Errorf("finding non-existent trace w/ref, expected nil, got %v", found) + } +} + +func TestFindParent(t *testing.T) { + // Direct parent finding. + // If the ref is the parent, we should find it even if the target trace + // isn't known to the family (e.g. the child is there, but the parent has + // been rotated and is no longer referenced). + + parent := newTrace("TestFindParent", "parent") + child := parent.NewChild("TestFindParent", "child").(*trace) + + // Remove the parent from the active list. + delete(families[parent.Family].active, parent.ID) + + if found := findInFamilies(parent.ID, ""); found != nil { + t.Errorf("finding parent without ref, expected nil, got %v", found) + } + if found := findInFamilies(parent.ID, child.ID); found != parent { + t.Errorf("finding parent with ref, expected %v, got %v", parent, found) + } +} diff --git a/internal/trace/trace.go b/internal/trace/trace.go index ad63270..3bbd651 100644 --- a/internal/trace/trace.go +++ b/internal/trace/trace.go @@ -3,25 +3,15 @@ package trace import ( "fmt" - "net/http" "strconv" "strings" "blitiri.com.ar/go/log" "github.com/miekg/dns" - nettrace "golang.org/x/net/trace" + nettrace "blitiri.com.ar/go/dnss/internal/nettrace" ) -func init() { - // golang.org/x/net/trace has its own authorization which by default only - // allows localhost. This can be confusing and limiting in environments - // which access the monitoring server remotely. - nettrace.AuthRequest = func(req *http.Request) (any, sensitive bool) { - return true, true - } -} - // A Trace represents an active request. type Trace struct { family string @@ -42,14 +32,17 @@ func New(family, title string) *Trace { // Printf adds this message to the trace's log. func (t *Trace) Printf(format string, a ...interface{}) { - t.printf(1, format, a...) + t.t.Printf(format, a...) } -func (t *Trace) printf(n int, format string, a ...interface{}) { - t.t.LazyPrintf(format, a...) +func (t *Trace) lprintf(n int, format string, a ...interface{}) { + t.t.Printf(format, a...) - log.Log(log.Debug, n+1, "%s %s: %s", t.family, t.title, - quote(fmt.Sprintf(format, a...))) + // If -v=3, also log to the main log. + if log.V(3) { + log.Log(log.Debug, n+1, "%s %s: %s", t.family, t.title, + quote(fmt.Sprintf(format, a...))) + } } // Errorf adds this message to the trace's log, with an error level. @@ -57,7 +50,7 @@ func (t *Trace) Errorf(format string, a ...interface{}) error { // Note we can't just call t.Error here, as it breaks caller logging. err := fmt.Errorf(format, a...) t.t.SetError() - t.t.LazyPrintf("error: %v", err) + t.t.Printf("error: %v", err) log.Log(log.Info, 1, "%s %s: error: %s", t.family, t.title, quote(err.Error())) @@ -68,7 +61,7 @@ func (t *Trace) Errorf(format string, a ...interface{}) error { // trace's log. func (t *Trace) Error(err error) error { t.t.SetError() - t.t.LazyPrintf("error: %v", err) + t.t.Printf("error: %v", err) log.Log(log.Info, 1, "%s %s: error: %s", t.family, t.title, quote(err.Error())) @@ -87,11 +80,9 @@ func (t *Trace) Finish() { // Question adds the given question to the trace. func (t *Trace) Question(qs []dns.Question) { - if !log.V(3) { - return + if log.V(1) { + t.lprintf(1, questionsToString(qs)) } - - t.printf(1, questionsToString(qs)) } func questionsToString(qs []dns.Question) string { @@ -105,13 +96,13 @@ func questionsToString(qs []dns.Question) string { // Answer adds the given DNS answer to the trace. func (t *Trace) Answer(m *dns.Msg) { - if !log.V(3) { + if !log.V(1) { return } - t.printf(1, m.MsgHdr.String()) + t.lprintf(1, m.MsgHdr.String()) for _, rr := range m.Answer { - t.printf(1, rr.String()) + t.lprintf(1, rr.String()) } } diff --git a/monitoring.go b/monitoring.go new file mode 100644 index 0000000..5e600c3 --- /dev/null +++ b/monitoring.go @@ -0,0 +1,191 @@ +package main + +import ( + "flag" + "fmt" + "html/template" + "net/http" + "os" + "runtime" + "runtime/debug" + "time" + + "blitiri.com.ar/go/dnss/internal/nettrace" + "blitiri.com.ar/go/log" +) + +var ( + Version = "" + SourceDate = time.Time{} +) + +func init() { + bi, ok := debug.ReadBuildInfo() + if !ok { + panic("unable to read build info") + } + + dirty := false + gitRev := "" + gitTime := "" + for _, s := range bi.Settings { + switch s.Key { + case "vcs.modified": + if s.Value == "true" { + dirty = true + } + case "vcs.time": + gitTime = s.Value + case "vcs.revision": + gitRev = s.Value + } + } + + SourceDate, _ = time.Parse(time.RFC3339, gitTime) + + Version = SourceDate.Format("20060102") + if gitRev != "" { + Version += fmt.Sprintf("-%.9s", gitRev) + } + if dirty { + Version += "-dirty" + } +} + +func monitoringServer(addr string) { + log.Infof("Monitoring HTTP server listening on %s", addr) + + http.HandleFunc("/", debugRoot()) + nettrace.RegisterHandler(http.DefaultServeMux) + + go http.ListenAndServe(addr, nil) +} + +func debugRoot() http.HandlerFunc { + hostname, _ := os.Hostname() + fSet, fUnset := flagsLists() + + indexData := struct { + Hostname string + Version string + GoVersion string + SourceDate time.Time + StartTime time.Time + Args []string + Env []string + FlagsSet []string + FlagsUnset []string + }{ + Hostname: hostname, + Version: Version, + GoVersion: runtime.Version(), + SourceDate: SourceDate, + StartTime: time.Now(), + Args: os.Args, + Env: os.Environ(), + FlagsSet: fSet, + FlagsUnset: fUnset, + } + + return func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + if err := htmlIndex.Execute(w, indexData); err != nil { + log.Infof("Monitoring handler error: %v", err) + } + } +} + +var tmplFuncs = template.FuncMap{ + "since": time.Since, + "roundDuration": func(d time.Duration) time.Duration { + return d.Round(time.Second) + }, +} + +// Static index for the monitoring website. +var htmlIndex = template.Must( + template.New("index").Funcs(tmplFuncs).Parse( + `<!DOCTYPE html> +<html> + +<head> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<title>dnss @{{.Hostname}}</title> +<style type="text/css"> + body { + font-family: sans-serif; + } + @media (prefers-color-scheme: dark) { + body { + background: #121212; + color: #c9d1d9; + } + a { color: #44b4ec; } + } +</style> +</head> + +<body> + <h1>dnss @{{.Hostname}}</h1> + + version {{.Version}}<br> + source date {{.SourceDate.Format "2006-01-02 15:04:05 -0700"}}<br> + built with: {{.GoVersion}}<p> + + started {{.StartTime.Format "Mon, 2006-01-02 15:04:05 -0700"}}<br> + up for {{.StartTime | since | roundDuration}}<br> + os hostname <i>{{.Hostname}}</i><br> + <p> + + <ul> + <li><a href="/debug/traces">traces</a> + <li><a href="/debug/dnsserver/cache/dump">cache dump</a> + <li><a href="/debug/pprof">pprof</a> + <small><a href="https://golang.org/pkg/net/http/pprof/"> + (ref)</a></small> + <ul> + <li><a href="/debug/pprof/goroutine?debug=1">goroutines</a> + </ul> + <li><a href="/debug/flags">flags</a> + <li><a href="/debug/vars">public variables</a> + </ul> + + <details> + <summary>flags</summary> + <ul> + {{range .FlagsSet}}<li><tt>{{.}}</tt>{{end}} + {{range .FlagsUnset}}<li><i><tt>{{.}}</tt></i>{{end}} + </ul> + </details> + <p> + + <details> + <summary>env</summary> + <ul> {{range .Env}}<li><tt>{{.}}</tt>{{end}} </ul> + </details> + +</body> +</html> +`)) + +func flagsLists() (set, unset []string) { + visited := make(map[string]bool) + // Print set flags first, then the rest. + flag.Visit(func(f *flag.Flag) { + set = append(set, fmt.Sprintf("-%s=%s", f.Name, f.Value.String())) + visited[f.Name] = true + }) + + flag.VisitAll(func(f *flag.Flag) { + if visited[f.Name] { + return + } + + unset = append(unset, fmt.Sprintf("-%s=%s", f.Name, f.Value.String())) + }) + + return set, unset +} diff --git a/tests/external.sh b/tests/external.sh index 37d1c1f..7cb4e25 100755 --- a/tests/external.sh +++ b/tests/external.sh @@ -144,13 +144,13 @@ mv .dnss.log .dnss.http.log wait_until_ready tcp 1999 -echo "## Checking /debug/flags" -if ! get "http://localhost:1900/debug/flags"; then - echo "Failed to get /debug/flags" +echo "## Checking monitoring server /" +if ! get "http://localhost:1900/"; then + echo "Failed to get /" exit 1 fi if ! grep -q "insecure_http_server=true" .wget.out; then - echo "/debug/flags did not contain expected flags (see .wget.out)" + echo "/ did not contain expected flags (see .wget.out)" exit 1 fi