author | Alberto Bertogli
<albertito@blitiri.com.ar> 2016-10-08 00:27:13 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2016-10-09 23:51:05 UTC |
parent | 2b801a84d19adbed3007d4f8c19f9df43f06dea6 |
chasquid.go | +99 | -25 |
internal/courier/procmail.go | +3 | -2 |
internal/courier/smtp.go | +8 | -7 |
internal/queue/queue.go | +44 | -28 |
internal/trace/trace.go | +23 | -8 |
diff --git a/chasquid.go b/chasquid.go index 2bc34ff..a7c80c1 100644 --- a/chasquid.go +++ b/chasquid.go @@ -9,7 +9,6 @@ import ( "io/ioutil" "math/rand" "net" - "net/http" "net/mail" "net/textproto" "os" @@ -32,6 +31,7 @@ import ( "blitiri.com.ar/go/chasquid/internal/trace" "blitiri.com.ar/go/chasquid/internal/userdb" + "net/http" _ "net/http/pprof" "github.com/golang/glog" @@ -67,9 +67,7 @@ func main() { os.Chdir(*configDir) if conf.MonitoringAddress != "" { - glog.Infof("Monitoring HTTP server listening on %s", - conf.MonitoringAddress) - go http.ListenAndServe(conf.MonitoringAddress, nil) + launchMonitoringServer(conf.MonitoringAddress) } s := NewServer() @@ -301,6 +299,11 @@ func (s *Server) InitQueue(path string, localC, remoteC courier.Courier) { glog.Fatalf("Error loading queue: %v", err) } s.queue = q + + http.HandleFunc("/debug/queue", + func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(q.DumpString())) + }) } // periodicallyReload some of the server's information, such as aliases and @@ -395,6 +398,9 @@ type Conn struct { mode SocketMode tlsConnState *tls.ConnectionState + // Tracer to use. + tr *trace.Trace + // System configuration. config *config.Config @@ -442,9 +448,8 @@ type Conn struct { func (c *Conn) Handle() { defer c.netconn.Close() - tr := trace.New("SMTP", "Connection") - defer tr.Finish() - tr.LazyPrintf("RemoteAddr: %s", c.netconn.RemoteAddr()) + c.tr = trace.New("SMTP.Conn", c.netconn.RemoteAddr().String()) + defer c.tr.Finish() c.tc.PrintfLine("220 %s ESMTP chasquid", c.hostname) @@ -454,8 +459,8 @@ func (c *Conn) Handle() { loop: for { if time.Since(c.deadline) > 0 { - tr.LazyPrintf("connection deadline exceeded") err = fmt.Errorf("connection deadline exceeded") + c.tr.Error(err) break } @@ -467,7 +472,7 @@ loop: break } - tr.LazyPrintf("-> %s %s", cmd, params) + c.tr.Debugf("-> %s %s", cmd, params) var code int var msg string @@ -493,11 +498,11 @@ loop: code, msg = c.RCPT(params) case "DATA": // DATA handles the whole sequence. - code, msg = c.DATA(params, tr) + code, msg = c.DATA(params) case "STARTTLS": - code, msg = c.STARTTLS(params, tr) + code, msg = c.STARTTLS(params) case "AUTH": - code, msg = c.AUTH(params, tr) + code, msg = c.AUTH(params) case "QUIT": c.writeResponse(221, "Be seeing you...") break loop @@ -507,7 +512,12 @@ loop: } if code > 0 { - tr.LazyPrintf("<- %d %s", code, msg) + c.tr.Debugf("<- %d %s", code, msg) + + // Be verbose about errors, to help troubleshooting. + if code >= 400 { + c.tr.Errorf("%s failed: %d %s", cmd, code, msg) + } err = c.writeResponse(code, msg) if err != nil { @@ -517,8 +527,7 @@ loop: } if err != nil { - tr.LazyPrintf("exiting with error: %v", err) - tr.SetError() + c.tr.Errorf("exiting with error: %v", err) } } @@ -624,6 +633,7 @@ func (c *Conn) MAIL(params string) (code int, msg string) { if tcp, ok := c.netconn.RemoteAddr().(*net.TCPAddr); ok { c.spfResult, c.spfError = spf.CheckHost( tcp.IP, envelope.DomainOf(e.Address)) + c.tr.Debugf("SPF %v (%v)", c.spfResult, c.spfError) // https://tools.ietf.org/html/rfc7208#section-8 // We opt not to fail on errors, to avoid accidents to prevent // delivery. @@ -688,7 +698,7 @@ func (c *Conn) RCPT(params string) (code int, msg string) { return 250, "You have an eerie feeling..." } -func (c *Conn) DATA(params string, tr *trace.Trace) (code int, msg string) { +func (c *Conn) DATA(params string) (code int, msg string) { if c.mailFrom == "" { return 503, "sender not yet given" } @@ -703,7 +713,7 @@ func (c *Conn) DATA(params string, tr *trace.Trace) (code int, msg string) { return 554, fmt.Sprintf("error writing DATA response: %v", err) } - tr.LazyPrintf("<- 354 You experience a strange sense of peace") + c.tr.Debugf("<- 354 You experience a strange sense of peace") // Increase the deadline for the data transfer to the connection-level // one, we don't want the command timeout to interfere. @@ -715,7 +725,7 @@ func (c *Conn) DATA(params string, tr *trace.Trace) (code int, msg string) { return 554, fmt.Sprintf("error reading DATA: %v", err) } - tr.LazyPrintf("-> ... %d bytes of data", len(c.data)) + c.tr.Debugf("-> ... %d bytes of data", len(c.data)) c.addReceivedHeader() @@ -723,12 +733,10 @@ func (c *Conn) DATA(params string, tr *trace.Trace) (code int, msg string) { // individual deliveries fail, we report via email. msgID, err := c.queue.Put(c.hostname, c.mailFrom, c.rcptTo, c.data) if err != nil { - tr.LazyPrintf(" error queueing: %v", err) - tr.SetError() return 554, fmt.Sprintf("Failed to enqueue message: %v", err) } - tr.LazyPrintf(" ... queued: %q", msgID) + c.tr.Printf("Queued from %s to %s - %s", c.mailFrom, c.rcptTo, msgID) // It is very important that we reset the envelope before returning, // so clients can send other emails right away without needing to RSET. @@ -779,7 +787,7 @@ func (c *Conn) addReceivedHeader() { } } -func (c *Conn) STARTTLS(params string, tr *trace.Trace) (code int, msg string) { +func (c *Conn) STARTTLS(params string) (code int, msg string) { if c.onTLS { return 503, "You are already wearing that!" } @@ -789,7 +797,7 @@ func (c *Conn) STARTTLS(params string, tr *trace.Trace) (code int, msg string) { return 554, fmt.Sprintf("error writing STARTTLS response: %v", err) } - tr.LazyPrintf("<- 220 You experience a strange sense of peace") + c.tr.Debugf("<- 220 You experience a strange sense of peace") server := tls.Server(c.netconn, c.tlsConfig) err = server.Handshake() @@ -797,7 +805,7 @@ func (c *Conn) STARTTLS(params string, tr *trace.Trace) (code int, msg string) { return 554, fmt.Sprintf("error in TLS handshake: %v", err) } - tr.LazyPrintf("<> ... jump to TLS was successful") + c.tr.Debugf("<> ... jump to TLS was successful") // Override the connections. We don't need the older ones anymore. c.netconn = server @@ -823,7 +831,7 @@ func (c *Conn) STARTTLS(params string, tr *trace.Trace) (code int, msg string) { return 0, "" } -func (c *Conn) AUTH(params string, tr *trace.Trace) (code int, msg string) { +func (c *Conn) AUTH(params string) (code int, msg string) { if !c.onTLS { return 503, "You feel vulnerable" } @@ -966,3 +974,69 @@ func writeResponse(w io.Writer, code int, msg string) error { return nil } + +func launchMonitoringServer(addr string) { + glog.Infof("Monitoring HTTP server listening on %s", addr) + + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + w.Write([]byte(monitoringHTMLIndex)) + }) + + flags := dumpFlags() + http.HandleFunc("/debug/flags", func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(flags)) + }) + + go http.ListenAndServe(addr, nil) +} + +// Static index for the monitoring website. +const monitoringHTMLIndex = `<!DOCTYPE html> +<html> + <head> + <title>chasquid monitoring</title> + </head> + <body> + <h1>chasquid monitoring</h1> + <ul> + <li><a href="/debug/queue">queue</a> + <li><a href="/debug/requests">requests</a> + <small><a href="https://godoc.org/golang.org/x/net/trace"> + (ref)</a></small> + <li><a href="/debug/flags">flags</a> + <li><a href="/debug/vars">public variables</a> + <li><a href="/debug/pprof">pprof</a> + <small><a href="https://golang.org/pkg/net/http/pprof/"> + (ref)</a></small> + <ul> + <li><a href="/debug/pprof/goroutine?debug=1">goroutines</a> + </ul> + </ul> + </body> +</html> +` + +// dumpFlags to a string, for troubleshooting purposes. +func dumpFlags() string { + s := "" + visited := make(map[string]bool) + + // Print set flags first, then the rest. + flag.Visit(func(f *flag.Flag) { + s += fmt.Sprintf("-%s=%s\n", f.Name, f.Value.String()) + visited[f.Name] = true + }) + + s += "\n" + flag.VisitAll(func(f *flag.Flag) { + if !visited[f.Name] { + s += fmt.Sprintf("-%s=%s\n", f.Name, f.Value.String()) + } + }) + + return s +} diff --git a/internal/courier/procmail.go b/internal/courier/procmail.go index c043165..a0d9188 100644 --- a/internal/courier/procmail.go +++ b/internal/courier/procmail.go @@ -26,14 +26,14 @@ type Procmail struct { } func (p *Procmail) Deliver(from string, to string, data []byte) (error, bool) { - tr := trace.New("Procmail", "Deliver") + tr := trace.New("Procmail.Courier", to) defer tr.Finish() // Sanitize, just in case. from = sanitizeForProcmail(from) to = sanitizeForProcmail(to) - tr.LazyPrintf("%s -> %s", from, to) + tr.Debugf("%s -> %s", from, to) // Prepare the command, replacing the necessary arguments. replacer := strings.NewReplacer( @@ -97,6 +97,7 @@ func (p *Procmail) Deliver(from string, to string, data []byte) (error, bool) { return err, permanent } + tr.Debugf("delivered") return nil, false } diff --git a/internal/courier/smtp.go b/internal/courier/smtp.go index 43361c6..71fcb11 100644 --- a/internal/courier/smtp.go +++ b/internal/courier/smtp.go @@ -34,9 +34,9 @@ type SMTP struct { } func (s *SMTP) Deliver(from string, to string, data []byte) (error, bool) { - tr := trace.New("SMTP", "Deliver") + tr := trace.New("SMTP.Courier", to) defer tr.Finish() - tr.LazyPrintf("%s -> %s", from, to) + tr.Debugf("%s -> %s", from, to) mx, err := lookupMX(envelope.DomainOf(to)) if err != nil { @@ -46,7 +46,7 @@ func (s *SMTP) Deliver(from string, to string, data []byte) (error, bool) { // have to make sure we try hard enough on the lookup above. return tr.Errorf("Could not find mail server: %v", err), true } - tr.LazyPrintf("MX: %s", mx) + tr.Debugf("MX: %s", mx) // Do we use insecure TLS? // Set as fallback when retrying. @@ -94,17 +94,17 @@ retry: } insecure = true - tr.LazyPrintf("TLS error, retrying insecurely") + tr.Debugf("TLS error, retrying insecurely") goto retry } if config.InsecureSkipVerify { - tr.LazyPrintf("Insecure - self-signed certificate") + tr.Debugf("Insecure - using TLS, but cert does not match %s", mx) } else { - tr.LazyPrintf("Secure - using TLS") + tr.Debugf("Secure - using TLS") } } else { - tr.LazyPrintf("Insecure - not using TLS") + tr.Debugf("Insecure - NOT using TLS") } // c.Mail will add the <> for us when the address is empty. @@ -130,6 +130,7 @@ retry: } c.Quit() + tr.Debugf("done") return nil, false } diff --git a/internal/queue/queue.go b/internal/queue/queue.go index 1e531c6..363ff78 100644 --- a/internal/queue/queue.go +++ b/internal/queue/queue.go @@ -24,12 +24,12 @@ import ( "blitiri.com.ar/go/chasquid/internal/envelope" "blitiri.com.ar/go/chasquid/internal/protoio" "blitiri.com.ar/go/chasquid/internal/set" + "blitiri.com.ar/go/chasquid/internal/trace" "github.com/golang/glog" "github.com/golang/protobuf/ptypes" "github.com/golang/protobuf/ptypes/timestamp" "golang.org/x/net/idna" - "golang.org/x/net/trace" ) const ( @@ -198,8 +198,6 @@ func (q *Queue) Put(hostname, from string, to []string, data []byte) (string, er q.q[item.ID] = item q.mu.Unlock() - glog.Infof("%s accepted from %q to %v", item.ID, from, to) - // Begin to send it right away. go item.SendLoop(q) @@ -219,8 +217,32 @@ func (q *Queue) Remove(id string) { q.mu.Unlock() } -// TODO: http handler for dumping the queue. -// Register it in main(). +// DumpString returns a human-readable string with the current queue. +// Useful for debugging purposes. +func (q *Queue) DumpString() string { + q.mu.RLock() + defer q.mu.RUnlock() + s := fmt.Sprintf("# Queue status\n\n") + s += fmt.Sprintf("date: %v\n", time.Now()) + s += fmt.Sprintf("length: %d\n\n", len(q.q)) + + for id, item := range q.q { + s += fmt.Sprintf("## Item %s\n", id) + item.Lock() + s += fmt.Sprintf("created at: %s\n", item.CreatedAt) + s += fmt.Sprintf("from: %s\n", item.From) + s += fmt.Sprintf("to: %s\n", item.To) + for _, rcpt := range item.Rcpt { + s += fmt.Sprintf("%s %s (%s)\n", rcpt.Status, rcpt.Address, rcpt.Type) + s += fmt.Sprintf(" original address: %s\n", rcpt.OriginalAddress) + s += fmt.Sprintf(" last failure: %q\n", rcpt.LastFailureMessage) + } + item.Unlock() + s += fmt.Sprintf("\n") + } + + return s +} // An item in the queue. type Item struct { @@ -263,9 +285,9 @@ func (item *Item) WriteTo(dir string) error { } func (item *Item) SendLoop(q *Queue) { - tr := trace.New("Queue", item.ID) + tr := trace.New("Queue.SendLoop", item.ID) defer tr.Finish() - tr.LazyPrintf("from: %s", item.From) + tr.Printf("from %s", item.From) var delay time.Duration for time.Since(item.CreatedAt) < giveUpAfter { @@ -284,23 +306,19 @@ func (item *Item) SendLoop(q *Queue) { go func(rcpt *Recipient, oldStatus Recipient_Status) { defer wg.Done() to := rcpt.Address - tr.LazyPrintf("%s sending", to) + tr.Debugf("%s sending", to) err, permanent := item.deliver(q, rcpt) if err != nil { if permanent { - tr.LazyPrintf("permanent error: %v", err) - glog.Infof("%s -> %q permanent fail: %v", item.ID, to, err) + tr.Errorf("%s permanent error: %v", to, err) status = Recipient_FAILED } else { - tr.LazyPrintf("error: %v", err) - glog.Infof("%s -> %q fail: %v", item.ID, to, err) + tr.Printf("%s temporary error: %v", to, err) } } else { - tr.LazyPrintf("%s successful", to) - glog.Infof("%s -> %q sent", item.ID, to) - + tr.Printf("%s sent", to) status = Recipient_SENT } @@ -315,8 +333,7 @@ func (item *Item) SendLoop(q *Queue) { err = item.WriteTo(q.path) if err != nil { - tr.LazyPrintf("failed to write: %v", err) - glog.Errorf("%s failed to write: %v", item.ID, err) + tr.Errorf("failed to write: %v", err) } } }(rcpt, status) @@ -338,14 +355,11 @@ func (item *Item) SendLoop(q *Queue) { // that some of the messages have been delayed. delay = nextDelay(delay) - tr.LazyPrintf("waiting for %v", delay) - glog.Infof("%s waiting for %v", item.ID, delay) + tr.Printf("waiting for %v", delay) time.Sleep(delay) } // Completed to all recipients (some may not have succeeded). - tr.LazyPrintf("all done") - glog.Infof("%s all done", item.ID) failed := 0 for _, rcpt := range item.Rcpt { @@ -358,26 +372,28 @@ func (item *Item) SendLoop(q *Queue) { sendDSN(tr, q, item) } + tr.Printf("all done") q.Remove(item.ID) return } -func sendDSN(tr trace.Trace, q *Queue, item *Item) { - tr.LazyPrintf("sending DSN") +func sendDSN(tr *trace.Trace, q *Queue, item *Item) { + tr.Debugf("sending DSN") msg, err := deliveryStatusNotification(item) if err != nil { - tr.LazyPrintf("failed to build DSN: %v", err) - glog.Infof("%s: failed to build DSN: %v", item.ID, err) + tr.Errorf("failed to build DSN: %v", err) return } - _, err = q.Put(item.Hostname, "<>", []string{item.From}, msg) + id, err := q.Put(item.Hostname, "<>", []string{item.From}, msg) if err != nil { - tr.LazyPrintf("failed to queue DSN: %v", err) - glog.Infof("%s: failed to queue DSN: %v", item.ID, err) + tr.Errorf("failed to queue DSN: %v", err) + return } + + tr.Printf("queued DSN: %s", id) } // deliver the item to the given recipient, using the couriers from the queue. diff --git a/internal/trace/trace.go b/internal/trace/trace.go index 4314079..fa4130f 100644 --- a/internal/trace/trace.go +++ b/internal/trace/trace.go @@ -3,6 +3,7 @@ package trace import ( "fmt" + "strconv" "github.com/golang/glog" nettrace "golang.org/x/net/trace" @@ -18,16 +19,30 @@ func New(family, title string) *Trace { return &Trace{family, title, nettrace.New(family, title)} } -func (t *Trace) LazyPrintf(format string, a ...interface{}) { +func (t *Trace) Printf(format string, a ...interface{}) { t.t.LazyPrintf(format, a...) + if glog.V(0) { + msg := fmt.Sprintf("%s %s: %s", t.family, t.title, + quote(fmt.Sprintf(format, a...))) + glog.InfoDepth(2, msg) + } +} + +func (t *Trace) Debugf(format string, a ...interface{}) { if glog.V(2) { - msg := fmt.Sprintf("%p %s %s: %+q", t, t.family, t.title, - fmt.Sprintf(format, a...)) - glog.InfoDepth(1, msg) + t.t.LazyPrintf(format, a...) + msg := fmt.Sprintf("%s %s: %s", t.family, t.title, + quote(fmt.Sprintf(format, a...))) + glog.InfoDepth(2, msg) } } +func quote(s string) string { + qs := strconv.Quote(s) + return qs[1 : len(qs)-1] +} + func (t *Trace) SetError() { t.t.SetError() } @@ -37,8 +52,8 @@ func (t *Trace) Errorf(format string, a ...interface{}) error { t.t.SetError() t.t.LazyPrintf("error: %v", err) - if glog.V(2) { - msg := fmt.Sprintf("%p %s %s: error: %v", t, t.family, t.title, err) + if glog.V(0) { + msg := fmt.Sprintf("%s %s: error: %v", t.family, t.title, err) glog.InfoDepth(1, msg) } return err @@ -48,8 +63,8 @@ func (t *Trace) Error(err error) error { t.t.SetError() t.t.LazyPrintf("error: %v", err) - if glog.V(2) { - msg := fmt.Sprintf("%p %s %s: error: %v", t, t.family, t.title, err) + if glog.V(0) { + msg := fmt.Sprintf("%s %s: error: %v", t, t.family, t.title, err) glog.InfoDepth(1, msg) } return err