git » chasquid » commit 0bf5d9b

Distinguish between permanent and transient errors

author Alberto Bertogli
2016-09-24 01:02:55 UTC
committer Alberto Bertogli
2016-10-09 23:51:04 UTC
parent 0dc93d1ec6ddae83afce273bd6c70b84ec1da618

Distinguish between permanent and transient errors

This patch makes the queue and couriers distinguish between permanent and
transient errors when delivering mail to individual recipients.

Pipe delivery errors are always permanent.

Procmail delivery errors are almost always permanent, except if the command
exited with code 75, which is an indication of transient.

SMTP delivery errors are almost always transient, except if the DNS resolution
for the domain failed.

internal/courier/courier.go +3 -1
internal/courier/procmail.go +20 -7
internal/courier/procmail_test.go +35 -4
internal/courier/smtp.go +20 -11
internal/courier/smtp_test.go +2 -2
internal/queue/queue.go +18 -13
internal/queue/queue_test.go +9 -15
test/util/exitcode +4 -0

diff --git a/internal/courier/courier.go b/internal/courier/courier.go
index e42a5b3..f9a9a24 100644
--- a/internal/courier/courier.go
+++ b/internal/courier/courier.go
@@ -5,5 +5,7 @@ package courier
 // It is implemented by different couriers, for both local and remote
 // recipients.
 type Courier interface {
-	Deliver(from string, to string, data []byte) error
+	// Deliver mail to a recipient. Return the error (if any), and whether it
+	// is permanent (true) or transient (false).
+	Deliver(from string, to string, data []byte) (error, bool)
 }
diff --git a/internal/courier/procmail.go b/internal/courier/procmail.go
index f5a9067..0f54c8e 100644
--- a/internal/courier/procmail.go
+++ b/internal/courier/procmail.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"os/exec"
 	"strings"
+	"syscall"
 	"time"
 	"unicode"
 
@@ -24,7 +25,7 @@ type Procmail struct {
 	Timeout time.Duration // Timeout for each invocation.
 }
 
-func (p *Procmail) Deliver(from string, to string, data []byte) error {
+func (p *Procmail) Deliver(from string, to string, data []byte) (error, bool) {
 	tr := trace.New("Procmail", "Deliver")
 	defer tr.Finish()
 
@@ -56,7 +57,7 @@ func (p *Procmail) Deliver(from string, to string, data []byte) error {
 
 	cmdStdin, err := cmd.StdinPipe()
 	if err != nil {
-		return tr.Errorf("StdinPipe: %v", err)
+		return tr.Errorf("StdinPipe: %v", err), true
 	}
 
 	output := &bytes.Buffer{}
@@ -65,12 +66,12 @@ func (p *Procmail) Deliver(from string, to string, data []byte) error {
 
 	err = cmd.Start()
 	if err != nil {
-		return tr.Errorf("Error starting procmail: %v", err)
+		return tr.Errorf("Error starting procmail: %v", err), true
 	}
 
 	_, err = bytes.NewBuffer(data).WriteTo(cmdStdin)
 	if err != nil {
-		return tr.Errorf("Error sending data to procmail: %v", err)
+		return tr.Errorf("Error sending data to procmail: %v", err), true
 	}
 
 	cmdStdin.Close()
@@ -78,12 +79,24 @@ func (p *Procmail) Deliver(from string, to string, data []byte) error {
 	err = cmd.Wait()
 
 	if ctx.Err() == context.DeadlineExceeded {
-		return tr.Error(errTimeout)
+		return tr.Error(errTimeout), false
 	}
+
 	if err != nil {
-		return tr.Errorf("Procmail failed: %v - %q", err, output.String())
+		// Determine if the error is permanent or not.
+		// Default to permanent, but error code 75 is transient by general
+		// convention (/usr/include/sysexits.h), and commonly relied upon.
+		permanent := true
+		if exiterr, ok := err.(*exec.ExitError); ok {
+			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+				permanent = status.ExitStatus() != 75
+			}
+		}
+		err = tr.Errorf("Procmail failed: %v - %q", err, output.String())
+		return err, permanent
 	}
-	return nil
+
+	return nil, false
 }
 
 // sanitizeForProcmail cleans the string, removing characters that could be
diff --git a/internal/courier/procmail_test.go b/internal/courier/procmail_test.go
index c711f9e..a6fc4ea 100644
--- a/internal/courier/procmail_test.go
+++ b/internal/courier/procmail_test.go
@@ -21,7 +21,7 @@ func TestProcmail(t *testing.T) {
 		Timeout: 1 * time.Minute,
 	}
 
-	err = p.Deliver("from@x", "to@local", []byte("data"))
+	err, _ = p.Deliver("from@x", "to@local", []byte("data"))
 	if err != nil {
 		t.Fatalf("Deliver: %v", err)
 	}
@@ -35,28 +35,59 @@ func TestProcmail(t *testing.T) {
 func TestProcmailTimeout(t *testing.T) {
 	p := Procmail{"/bin/sleep", []string{"1"}, 100 * time.Millisecond}
 
-	err := p.Deliver("from", "to@local", []byte("data"))
+	err, permanent := p.Deliver("from", "to@local", []byte("data"))
 	if err != errTimeout {
 		t.Errorf("Unexpected error: %v", err)
 	}
+	if permanent {
+		t.Errorf("expected transient, got permanent")
+	}
 }
 
 func TestProcmailBadCommandLine(t *testing.T) {
 	// Non-existent binary.
 	p := Procmail{"thisdoesnotexist", nil, 1 * time.Minute}
-	err := p.Deliver("from", "to", []byte("data"))
+	err, permanent := p.Deliver("from", "to", []byte("data"))
 	if err == nil {
 		t.Errorf("unexpected success for non-existent binary")
 	}
+	if !permanent {
+		t.Errorf("expected permanent, got transient")
+	}
 
 	// Incorrect arguments.
 	p = Procmail{"cat", []string{"--fail_unknown_option"}, 1 * time.Minute}
-	err = p.Deliver("from", "to", []byte("data"))
+	err, _ = p.Deliver("from", "to", []byte("data"))
 	if err == nil {
 		t.Errorf("unexpected success for incorrect arguments")
 	}
 }
 
+// Test that local delivery failures are considered permanent or not
+// according to the exit code.
+func TestExitCode(t *testing.T) {
+	cases := []struct {
+		cmd             string
+		args            []string
+		expectPermanent bool
+	}{
+		{"does/not/exist", nil, true},
+		{"../../test/util/exitcode", []string{"1"}, true},
+		{"../../test/util/exitcode", []string{"75"}, false},
+	}
+	for _, c := range cases {
+		p := &Procmail{c.cmd, c.args, 5 * time.Second}
+		err, permanent := p.Deliver("from", "to", []byte("data"))
+		if err == nil {
+			t.Errorf("%q: pipe delivery worked, expected failure", c.cmd)
+		}
+		if c.expectPermanent != permanent {
+			t.Errorf("%q: permanent expected=%v, got=%v",
+				c.cmd, c.expectPermanent, permanent)
+		}
+	}
+}
+
 func TestSanitize(t *testing.T) {
 	cases := []struct{ v, expected string }{
 		// These are the same.
diff --git a/internal/courier/smtp.go b/internal/courier/smtp.go
index cf2e199..7410f4d 100644
--- a/internal/courier/smtp.go
+++ b/internal/courier/smtp.go
@@ -35,14 +35,19 @@ var (
 type SMTP struct {
 }
 
-func (s *SMTP) Deliver(from string, to string, data []byte) error {
+func (s *SMTP) Deliver(from string, to string, data []byte) (error, bool) {
 	tr := trace.New("goingSMTP", "Deliver")
 	defer tr.Finish()
 	tr.LazyPrintf("%s  ->  %s", from, to)
 
+	// TODO: Fall back to A if MX is not available.
 	mx, err := lookupMX(envelope.DomainOf(to))
 	if err != nil {
-		return tr.Errorf("Could not find mail server: %v", err)
+		// Note this is considered a permanent error.
+		// This is in line with what other servers (Exim) do. However, the
+		// downside is that temporary DNS issues can affect delivery, so we
+		// have to make sure we try hard enough on the lookup above.
+		return tr.Errorf("Could not find mail server: %v", err), true
 	}
 	tr.LazyPrintf("MX: %s", mx)
 
@@ -53,13 +58,13 @@ func (s *SMTP) Deliver(from string, to string, data []byte) error {
 retry:
 	conn, err := net.DialTimeout("tcp", mx+":"+*smtpPort, smtpDialTimeout)
 	if err != nil {
-		return tr.Errorf("Could not dial: %v", err)
+		return tr.Errorf("Could not dial: %v", err), false
 	}
 	conn.SetDeadline(time.Now().Add(smtpTotalTimeout))
 
 	c, err := smtp.NewClient(conn, mx)
 	if err != nil {
-		return tr.Errorf("Error creating client: %v", err)
+		return tr.Errorf("Error creating client: %v", err), false
 	}
 
 	// TODO: Keep track of hosts and MXs that we've successfully done TLS
@@ -74,7 +79,7 @@ retry:
 			// Unfortunately, many servers use self-signed certs, so if we
 			// fail verification we just try again without validating.
 			if insecure {
-				return tr.Errorf("TLS error: %v", err)
+				return tr.Errorf("TLS error: %v", err), false
 			}
 
 			insecure = true
@@ -91,31 +96,35 @@ retry:
 		tr.LazyPrintf("Insecure - not using TLS")
 	}
 
+	// TODO: check if the errors we get back are transient or not.
+	// Go's smtp does not allow us to do this, so leave for when we do it
+	// ourselves.
+
 	if err = c.Mail(from); err != nil {
-		return tr.Errorf("MAIL %v", err)
+		return tr.Errorf("MAIL %v", err), false
 	}
 
 	if err = c.Rcpt(to); err != nil {
-		return tr.Errorf("RCPT TO %v", err)
+		return tr.Errorf("RCPT TO %v", err), false
 	}
 
 	w, err := c.Data()
 	if err != nil {
-		return tr.Errorf("DATA %v", err)
+		return tr.Errorf("DATA %v", err), false
 	}
 	_, err = w.Write(data)
 	if err != nil {
-		return tr.Errorf("DATA writing: %v", err)
+		return tr.Errorf("DATA writing: %v", err), false
 	}
 
 	err = w.Close()
 	if err != nil {
-		return tr.Errorf("DATA closing %v", err)
+		return tr.Errorf("DATA closing %v", err), false
 	}
 
 	c.Quit()
 
-	return nil
+	return nil, false
 }
 
 func lookupMX(domain string) (string, error) {
diff --git a/internal/courier/smtp_test.go b/internal/courier/smtp_test.go
index b90a7ba..9324e1a 100644
--- a/internal/courier/smtp_test.go
+++ b/internal/courier/smtp_test.go
@@ -73,7 +73,7 @@ func TestSMTP(t *testing.T) {
 	*smtpPort = port
 
 	s := &SMTP{}
-	err := s.Deliver("me@me", "to@to", []byte("data"))
+	err, _ := s.Deliver("me@me", "to@to", []byte("data"))
 	if err != nil {
 		t.Errorf("deliver failed: %v", err)
 	}
@@ -133,7 +133,7 @@ func TestSMTPErrors(t *testing.T) {
 		*smtpPort = port
 
 		s := &SMTP{}
-		err := s.Deliver("me@me", "to@to", []byte("data"))
+		err, _ := s.Deliver("me@me", "to@to", []byte("data"))
 		if err == nil {
 			t.Errorf("deliver not failed in case %q: %v", rs["_welcome"], err)
 		}
diff --git a/internal/queue/queue.go b/internal/queue/queue.go
index 09d68cc..3c21212 100644
--- a/internal/queue/queue.go
+++ b/internal/queue/queue.go
@@ -282,15 +282,17 @@ func (item *Item) SendLoop(q *Queue) {
 				to := rcpt.Address
 				tr.LazyPrintf("%s sending", to)
 
-				err := item.deliver(q, rcpt)
+				err, permanent := item.deliver(q, rcpt)
 
 				if err != nil {
-					// TODO: Local deliveries should not be retried, if they
-					// fail due to the user not existing.
-					// -> we need to know the users.
-					// Or maybe we can just not care?
-					tr.LazyPrintf("error: %v", err)
-					glog.Infof("%s  -> %q fail: %v", item.ID, to, err)
+					if permanent {
+						tr.LazyPrintf("permanent error: %v", err)
+						glog.Infof("%s  -> %q permanent fail: %v", item.ID, to, err)
+						status = Recipient_FAILED
+					} else {
+						tr.LazyPrintf("error: %v", err)
+						glog.Infof("%s  -> %q fail: %v", item.ID, to, err)
+					}
 				} else {
 					tr.LazyPrintf("%s successful", to)
 					glog.Infof("%s  -> %q sent", item.ID, to)
@@ -322,9 +324,9 @@ func (item *Item) SendLoop(q *Queue) {
 		}
 
 		if pending == 0 {
-			// Successfully sent to all recipients.
-			tr.LazyPrintf("all successful")
-			glog.Infof("%s all successful", item.ID)
+			// Completed to all recipients (some may not have succeeded).
+			tr.LazyPrintf("all done")
+			glog.Infof("%s all done", item.ID)
 
 			q.Remove(item.ID)
 			return
@@ -343,17 +345,20 @@ func (item *Item) SendLoop(q *Queue) {
 	// remove item from the queue, and remove from disk.
 }
 
-func (item *Item) deliver(q *Queue, rcpt *Recipient) error {
+// deliver the item to the given recipient, using the couriers from the queue.
+// Return an error (if any), and whether it is permanent or not.
+func (item *Item) deliver(q *Queue, rcpt *Recipient) (err error, permanent bool) {
 	if rcpt.Type == Recipient_PIPE {
 		c := strings.Fields(rcpt.Address)
 		if len(c) == 0 {
-			return fmt.Errorf("empty pipe")
+			return fmt.Errorf("empty pipe"), true
 		}
 		ctx, _ := context.WithDeadline(context.Background(),
 			time.Now().Add(30*time.Second))
 		cmd := exec.CommandContext(ctx, c[0], c[1:]...)
 		cmd.Stdin = bytes.NewReader(item.Data)
-		return cmd.Run()
+		return cmd.Run(), true
+
 	} else {
 		if envelope.DomainIn(rcpt.Address, q.localDomains) {
 			return q.localC.Deliver(item.From, rcpt.Address, item.Data)
diff --git a/internal/queue/queue_test.go b/internal/queue/queue_test.go
index 9c8f1ae..a750849 100644
--- a/internal/queue/queue_test.go
+++ b/internal/queue/queue_test.go
@@ -25,9 +25,9 @@ type deliverRequest struct {
 	data []byte
 }
 
-func (cc *ChanCourier) Deliver(from string, to string, data []byte) error {
+func (cc *ChanCourier) Deliver(from string, to string, data []byte) (error, bool) {
 	cc.requests <- deliverRequest{from, to, data}
-	return <-cc.results
+	return <-cc.results, false
 }
 func newChanCourier() *ChanCourier {
 	return &ChanCourier{
@@ -43,12 +43,12 @@ type TestCourier struct {
 	reqFor   map[string]*deliverRequest
 }
 
-func (tc *TestCourier) Deliver(from string, to string, data []byte) error {
+func (tc *TestCourier) Deliver(from string, to string, data []byte) (error, bool) {
 	defer tc.wg.Done()
 	dr := &deliverRequest{from, to, data}
 	tc.requests = append(tc.requests, dr)
 	tc.reqFor[to] = dr
-	return nil
+	return nil, false
 }
 
 func newTestCourier() *TestCourier {
@@ -138,14 +138,14 @@ func TestFullQueue(t *testing.T) {
 	q.Remove(id)
 }
 
-// Dumb courier, for when we don't care for the results.
+// Dumb courier, for when we just want to return directly.
 type DumbCourier struct{}
 
-func (c DumbCourier) Deliver(from string, to string, data []byte) error {
-	return nil
+func (c DumbCourier) Deliver(from string, to string, data []byte) (error, bool) {
+	return nil, false
 }
 
-var dumbCourier DumbCourier
+var dumbCourier = DumbCourier{}
 
 func TestAliases(t *testing.T) {
 	q := New("/tmp/queue_test", set.NewString("loco"), aliases.NewResolver(),
@@ -198,13 +198,7 @@ func TestPipes(t *testing.T) {
 		CreatedAt: time.Now(),
 	}
 
-	if err := item.deliver(q, item.Rcpt[0]); err != nil {
+	if err, _ := item.deliver(q, item.Rcpt[0]); err != nil {
 		t.Errorf("pipe delivery failed: %v", err)
 	}
-
-	// Make the command "false", should fail.
-	item.Rcpt[0].Address = "false"
-	if err := item.deliver(q, item.Rcpt[0]); err == nil {
-		t.Errorf("pipe delivery worked, expected failure")
-	}
 }
diff --git a/test/util/exitcode b/test/util/exitcode
new file mode 100755
index 0000000..ac62942
--- /dev/null
+++ b/test/util/exitcode
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+exit $1
+