git » chasquid » commit fa651e7

dovecot: Retry auto-detect until we find a usable socket pair

author Alberto Bertogli
2021-05-23 23:22:19 UTC
committer Alberto Bertogli
2021-05-24 09:21:33 UTC
parent 84e6c066fab5afd6b512cff321f064c7e94f4b12

dovecot: Retry auto-detect until we find a usable socket pair

Currently, chasquid attempts to auto-detect dovecot sockets when
starting up (if needed). If autodetection fails, chasquid emits an
error, continues serving, and never tries again.

This can be problematic if chasquid starts up before dovecot, and at the
time the dovecot sockets are not present (e.g. after a reboot). In that
case, chasquid will not use dovecot for authentication even after
dovecot has started.

This patch changes the autodetect logic, by doing autodetection at
startup and on each request, until we find a working pair of sockets.
Once we do, they're used consistently.

That way, if dovecot is not ready when chasquid starts, it's not a
problem and chasquid will start using dovecot once it becomes available.

Thanks to Thor77 (thor77@thor77.org) for reporting and helping
troubleshoot this issue.

chasquid.go +5 -11
docs/knownissues.md +22 -0
internal/dovecot/dovecot.go +63 -51
internal/dovecot/dovecot_test.go +56 -46

diff --git a/chasquid.go b/chasquid.go
index 3c23487..ccafeb7 100644
--- a/chasquid.go
+++ b/chasquid.go
@@ -281,18 +281,12 @@ func loadDomain(name, dir string, s *smtpsrv.Server) {
 }
 
 func loadDovecot(s *smtpsrv.Server, userdb, client string) {
-	a := dovecot.Autodetect(userdb, client)
-	if a == nil {
-		log.Errorf("Dovecot autodetection failed, no dovecot fallback")
-		return
-	}
+	a := dovecot.NewAuth(userdb, client)
+	s.SetAuthFallback(a)
+	log.Infof("Fallback authenticator: %v", a)
 
-	if a != nil {
-		s.SetAuthFallback(a)
-		log.Infof("Fallback authenticator: %v", a)
-		if err := a.Check(); err != nil {
-			log.Errorf("Failed dovecot authenticator check: %v", err)
-		}
+	if err := a.Check(); err != nil {
+		log.Errorf("Warning: Dovecot auth is not responding: %v", err)
 	}
 }
 
diff --git a/docs/knownissues.md b/docs/knownissues.md
index 37dc503..4544837 100644
--- a/docs/knownissues.md
+++ b/docs/knownissues.md
@@ -9,6 +9,28 @@ Entries are eventually be purged once their affected versions become uncommon,
 to prevent confusion.
 
 
+## Dovecot auth occasionally not functional after a reboot (0.04 to 1.6)
+
+After a reboot, if chasquid starts *before* dovecot, it's possible that
+chasquid fails to autodetect the dovecot addresses, and the dovecot
+authentication will not be functional until chasquid is restarted.
+
+This condition can be identified by seeing
+`Dovecot autodetection failed, no dovecot fallback` in the chasquid logs, at
+start-up time.
+
+As a workaround, you can create the following systemd dropin file at
+`/etc/systemd/system/chasquid.service.d/after-dovecot.conf`, to make chasquid
+be started *after* dovecot:
+
+```
+[Unit]
+After=dovecot.service
+```
+
+The issue is fixed in 1.7.
+
+
 ## `dkimsign` causes parsing errors in post-data hook (0.07 to 1.5)
 
 The default post-data hook in versions 0.07 to 1.5 has a bug where if the
diff --git a/internal/dovecot/dovecot.go b/internal/dovecot/dovecot.go
index ea6cf89..bb4836d 100644
--- a/internal/dovecot/dovecot.go
+++ b/internal/dovecot/dovecot.go
@@ -17,6 +17,7 @@ import (
 	"net/textproto"
 	"os"
 	"strings"
+	"sync"
 	"time"
 	"unicode"
 )
@@ -27,6 +28,9 @@ const DefaultTimeout = 5 * time.Second
 
 var (
 	errUsernameNotSafe = errors.New("username not safe (contains spaces)")
+	errFailedToConnect = errors.New("failed to connect to dovecot")
+	errNoUserdbSocket  = errors.New("unable to find userdb socket")
+	errNoClientSocket  = errors.New("unable to find client socket")
 )
 
 var defaultUserdbPaths = []string{
@@ -41,8 +45,11 @@ var defaultClientPaths = []string{
 
 // Auth represents a particular Dovecot auth service to use.
 type Auth struct {
-	userdbAddr string
-	clientAddr string
+	addr struct {
+		mu     *sync.Mutex
+		userdb string
+		client string
+	}
 
 	// Timeout for connection and I/O operations (applies on each call).
 	// Set to DefaultTimeout by NewAuth.
@@ -53,30 +60,30 @@ type Auth struct {
 // takes the addresses of userdb and client sockets (usually paths as
 // configured in dovecot).
 func NewAuth(userdb, client string) *Auth {
-	return &Auth{
-		userdbAddr: userdb,
-		clientAddr: client,
-		Timeout:    DefaultTimeout,
-	}
+	a := &Auth{}
+	a.addr.mu = &sync.Mutex{}
+	a.addr.userdb = userdb
+	a.addr.client = client
+	a.Timeout = DefaultTimeout
+	return a
 }
 
 // String representation of this Auth, for human consumption.
 func (a *Auth) String() string {
-	return fmt.Sprintf("DovecotAuth(%q, %q)", a.userdbAddr, a.clientAddr)
+	a.addr.mu.Lock()
+	defer a.addr.mu.Unlock()
+	return fmt.Sprintf("DovecotAuth(%q, %q)", a.addr.userdb, a.addr.client)
 }
 
-// Check to see if this auth is valid (but may not be working).
+// Check to see if this auth is functional.
 func (a *Auth) Check() error {
-	// We intentionally don't connect or complete any handshakes because
-	// dovecot may not be up yet, even thought it may be configured properly.
-	// Just check that the addresses are valid sockets.
-	if !isUnixSocket(a.userdbAddr) {
-		return fmt.Errorf("userdb is not an unix socket")
+	u, c, err := a.getAddrs()
+	if err != nil {
+		return err
 	}
-	if !isUnixSocket(a.clientAddr) {
-		return fmt.Errorf("client is not an unix socket")
+	if !(a.canDial(u) && a.canDial(c)) {
+		return errFailedToConnect
 	}
-
 	return nil
 }
 
@@ -86,7 +93,12 @@ func (a *Auth) Exists(user string) (bool, error) {
 		return false, errUsernameNotSafe
 	}
 
-	conn, err := a.dial("unix", a.userdbAddr)
+	userdbAddr, _, err := a.getAddrs()
+	if err != nil {
+		return false, err
+	}
+
+	conn, err := a.dial("unix", userdbAddr)
 	if err != nil {
 		return false, err
 	}
@@ -134,7 +146,12 @@ func (a *Auth) Authenticate(user, passwd string) (bool, error) {
 		return false, errUsernameNotSafe
 	}
 
-	conn, err := a.dial("unix", a.clientAddr)
+	_, clientAddr, err := a.getAddrs()
+	if err != nil {
+		return false, err
+	}
+
+	conn, err := a.dial("unix", clientAddr)
 	if err != nil {
 		return false, err
 	}
@@ -233,48 +250,43 @@ func isUsernameSafe(user string) bool {
 	return true
 }
 
-// Autodetect where the dovecot authentication paths are, and return an Auth
-// instance for them. If any of userdb or client are != "", they will be used
-// and not autodetected.
-func Autodetect(userdb, client string) *Auth {
-	// If both are given, no need to autodtect.
-	if userdb != "" && client != "" {
-		return NewAuth(userdb, client)
-	}
-
-	var userdbs, clients []string
-	if userdb != "" {
-		userdbs = append(userdbs, userdb)
-	}
-	if client != "" {
-		clients = append(clients, client)
-	}
-
-	if len(userdbs) == 0 {
-		userdbs = append(userdbs, defaultUserdbPaths...)
-	}
+// getAddrs returns the addresses to the userdb and client sockets.
+func (a *Auth) getAddrs() (string, string, error) {
+	a.addr.mu.Lock()
+	defer a.addr.mu.Unlock()
 
-	if len(clients) == 0 {
-		clients = append(clients, defaultClientPaths...)
+	if a.addr.userdb == "" {
+		for _, u := range defaultUserdbPaths {
+			if a.canDial(u) {
+				a.addr.userdb = u
+				break
+			}
+		}
+		if a.addr.userdb == "" {
+			return "", "", errNoUserdbSocket
+		}
 	}
 
-	// Go through each possiblity, return the first auth that works.
-	for _, u := range userdbs {
-		for _, c := range clients {
-			a := NewAuth(u, c)
-			if a.Check() == nil {
-				return a
+	if a.addr.client == "" {
+		for _, c := range defaultClientPaths {
+			if a.canDial(c) {
+				a.addr.client = c
+				break
 			}
 		}
+		if a.addr.client == "" {
+			return "", "", errNoClientSocket
+		}
 	}
 
-	return nil
+	return a.addr.userdb, a.addr.client, nil
 }
 
-func isUnixSocket(path string) bool {
-	fi, err := os.Stat(path)
+func (a *Auth) canDial(path string) bool {
+	conn, err := a.dial("unix", path)
 	if err != nil {
 		return false
 	}
-	return fi.Mode()&os.ModeSocket != 0
+	conn.Close()
+	return true
 }
diff --git a/internal/dovecot/dovecot_test.go b/internal/dovecot/dovecot_test.go
index 2e71dd5..d57a80c 100644
--- a/internal/dovecot/dovecot_test.go
+++ b/internal/dovecot/dovecot_test.go
@@ -31,13 +31,11 @@ func TestUsernameNotSafe(t *testing.T) {
 }
 
 func TestAutodetect(t *testing.T) {
-	// If we give both parameters to autodetect, it should return a new Auth
-	// using them, even if they're not valid.
-	a := Autodetect("uDoesNotExist", "cDoesNotExist")
-	if a == nil {
-		t.Errorf("Autodetection with two params failed")
-	} else if *a != *NewAuth("uDoesNotExist", "cDoesNotExist") {
-		t.Errorf("Autodetection with two params: got %v", a)
+	// Check on a pair that does not exist.
+	a := NewAuth("uDoesNotExist", "cDoesNotExist")
+	err := a.Check()
+	if err != errFailedToConnect {
+		t.Errorf("Expected failure to connect, got %v", err)
 	}
 
 	// We override the default paths, so we can point the "defaults" to our
@@ -46,9 +44,18 @@ func TestAutodetect(t *testing.T) {
 	defaultClientPaths = []string{"/dev/null"}
 
 	// Autodetect failure: no valid sockets on the list.
-	a = Autodetect("", "")
-	if a != nil {
-		t.Errorf("Autodetection worked with only /dev/null, got %v", a)
+	a = NewAuth("", "")
+	err = a.Check()
+	if err != errNoUserdbSocket {
+		t.Errorf("Expected failure to find userdb socket, got %v", err)
+	}
+	ok, err := a.Exists("user")
+	if ok != false || err != errNoUserdbSocket {
+		t.Errorf("Expected {false, no userdb socket}, got {%v, %v}", ok, err)
+	}
+	ok, err = a.Authenticate("user", "password")
+	if ok != false || err != errNoUserdbSocket {
+		t.Errorf("Expected {false, no userdb socket}, got {%v, %v}", ok, err)
 	}
 
 	// Create a temporary directory, and two sockets on it.
@@ -61,45 +68,54 @@ func TestAutodetect(t *testing.T) {
 	uL := mustListen(t, userdb)
 	cL := mustListen(t, client)
 
-	defaultUserdbPaths = append(defaultUserdbPaths, userdb)
-	defaultClientPaths = append(defaultClientPaths, client)
+	// Autodetect finds the user, but fails to find the client.
+	defaultUserdbPaths = []string{"/dev/null", userdb}
+	defaultClientPaths = []string{"/dev/null"}
+	a = NewAuth("", "")
+	err = a.Check()
+	if err != errNoClientSocket {
+		t.Errorf("Expected failure to find userdb socket, got %v", err)
+	}
 
-	// Autodetect should work fine against open sockets.
-	a = Autodetect("", "")
-	if a == nil {
-		t.Errorf("Autodetection failed (open sockets)")
-	} else if a.userdbAddr != userdb || a.clientAddr != client {
+	// Autodetect should pick the suggestions passed as parameters (if
+	// possible).
+	defaultUserdbPaths = []string{"/dev/null"}
+	defaultClientPaths = []string{"/dev/null", client}
+	a = NewAuth(userdb, "")
+	err = a.Check()
+	if err != nil {
+		t.Errorf("Expected successful check, got %v", err)
+	}
+	if a.addr.userdb != userdb || a.addr.client != client {
 		t.Errorf("Expected autodetect to pick {%q, %q}, but got {%q, %q}",
-			userdb, client, a.userdbAddr, a.clientAddr)
+			userdb, client, a.addr.userdb, a.addr.client)
 	}
 
-	// Close the two sockets, and re-do the test from above: Autodetect should
-	// work fine against closed sockets.
+	// Successful autodetection against open sockets.
+	defaultUserdbPaths = append(defaultUserdbPaths, userdb)
+	defaultClientPaths = append(defaultClientPaths, client)
+	a = NewAuth("", "")
+	err = a.Check()
+	if err != nil {
+		t.Errorf("Expected successful check, got %v", err)
+	}
+
+	// Close the two sockets, and re-do the check: now we have pinned the
+	// paths, and check should fail to connect.
 	// We need to tell Go to keep the socket files around explicitly, as the
-	// default is to delete them since they were creeated by the net library.
+	// default is to delete them since they were created by the net library.
 	uL.SetUnlinkOnClose(false)
 	uL.Close()
-	cL.SetUnlinkOnClose(false)
-	cL.Close()
-
-	a = Autodetect("", "")
-	if a == nil {
-		t.Errorf("Autodetection failed (closed sockets)")
-	} else if a.userdbAddr != userdb || a.clientAddr != client {
-		t.Errorf("Expected autodetect to pick {%q, %q}, but got {%q, %q}",
-			userdb, client, a.userdbAddr, a.clientAddr)
+	err = a.Check()
+	if err != errFailedToConnect {
+		t.Errorf("Expected failed to connect, got %v", err)
 	}
 
-	// Autodetect should pick the suggestions passed as parameters (if
-	// possible).
-	defaultUserdbPaths = []string{"/dev/null"}
-	defaultClientPaths = []string{"/dev/null", client}
-	a = Autodetect(userdb, "")
-	if a == nil {
-		t.Errorf("Autodetection failed (single parameter)")
-	} else if a.userdbAddr != userdb || a.clientAddr != client {
-		t.Errorf("Expected autodetect to pick {%q, %q}, but got {%q, %q}",
-			userdb, client, a.userdbAddr, a.clientAddr)
+	cL.SetUnlinkOnClose(false)
+	cL.Close()
+	err = a.Check()
+	if err != errFailedToConnect {
+		t.Errorf("Expected failed to connect, got %v", err)
 	}
 }
 
@@ -124,9 +140,3 @@ func mustListen(t *testing.T, path string) *net.UnixListener {
 
 	return l
 }
-
-func TestNotASocket(t *testing.T) {
-	if isUnixSocket("/doesnotexist") {
-		t.Errorf("isUnixSocket(/doesnotexist) returned true")
-	}
-}