git » chasquid » commit 08273ea

queue: Sync the files written on Put

author Alberto Bertogli
2025-10-18 11:10:24 UTC
committer Alberto Bertogli
2025-10-18 11:10:24 UTC
parent 7d56f1b4b410b38d60114dc4d83f5737c2abbd90

queue: Sync the files written on Put

When we put something in the queue and respond "250 ok" to the client,
that is taken as accepting the email.

As part of putting something in the queue, we write it to disk, but
today we don't do an fsync on that file.

That leaves a gap where a badly timed crash on some systems could lead
to the file being empty, causing us to lose an email that we accepted.

To elliminate (or drastically reduce on some filesystems) the chances of
that situation, we call fsync on the file that gets written when we put
something in the queue.

Thanks to nolanl@github for reporting this in
https://github.com/albertito/chasquid/issues/78.

internal/protoio/protoio.go +4 -4
internal/queue/queue.go +5 -1
internal/safeio/safeio.go +18 -0
internal/safeio/safeio_test.go +28 -0

diff --git a/internal/protoio/protoio.go b/internal/protoio/protoio.go
index b6bdd79..77df2f3 100644
--- a/internal/protoio/protoio.go
+++ b/internal/protoio/protoio.go
@@ -33,13 +33,13 @@ func ReadTextMessage(fname string, pb proto.Message) error {
 }
 
 // WriteMessage marshals pb and atomically writes it into fname.
-func WriteMessage(fname string, pb proto.Message, perm os.FileMode) error {
+func WriteMessage(fname string, pb proto.Message, perm os.FileMode, ops ...safeio.FileOp) error {
 	out, err := proto.Marshal(pb)
 	if err != nil {
 		return err
 	}
 
-	return safeio.WriteFile(fname, out, perm)
+	return safeio.WriteFile(fname, out, perm, ops...)
 }
 
 var textOpts = prototext.MarshalOptions{
@@ -48,12 +48,12 @@ var textOpts = prototext.MarshalOptions{
 
 // WriteTextMessage marshals pb in text format and atomically writes it into
 // fname.
-func WriteTextMessage(fname string, pb proto.Message, perm os.FileMode) error {
+func WriteTextMessage(fname string, pb proto.Message, perm os.FileMode, ops ...safeio.FileOp) error {
 	out, err := textOpts.Marshal(pb)
 	if err != nil {
 		return err
 	}
-	return safeio.WriteFile(fname, out, perm)
+	return safeio.WriteFile(fname, out, perm, ops...)
 }
 
 ///////////////////////////////////////////////////////////////
diff --git a/internal/queue/queue.go b/internal/queue/queue.go
index 0b177d4..202f6f4 100644
--- a/internal/queue/queue.go
+++ b/internal/queue/queue.go
@@ -25,6 +25,7 @@ import (
 	"blitiri.com.ar/go/chasquid/internal/expvarom"
 	"blitiri.com.ar/go/chasquid/internal/maillog"
 	"blitiri.com.ar/go/chasquid/internal/protoio"
+	"blitiri.com.ar/go/chasquid/internal/safeio"
 	"blitiri.com.ar/go/chasquid/internal/set"
 	"blitiri.com.ar/go/chasquid/internal/trace"
 	"blitiri.com.ar/go/log"
@@ -306,7 +307,10 @@ func (item *Item) WriteTo(dir string) error {
 
 	path := fmt.Sprintf("%s/%s%s", dir, itemFilePrefix, item.ID)
 
-	return protoio.WriteTextMessage(path, &item.Message, 0600)
+	// Write the file in text format for ease of debugging, and use fsync to
+	// improve durability.
+	return protoio.WriteTextMessage(
+		path, &item.Message, 0600, safeio.FsyncFileOp)
 }
 
 // SendLoop repeatedly attempts to send the item.
diff --git a/internal/safeio/safeio.go b/internal/safeio/safeio.go
index 31ad2a4..8115ade 100644
--- a/internal/safeio/safeio.go
+++ b/internal/safeio/safeio.go
@@ -95,3 +95,21 @@ func getOwner(fname string) (uid, gid int) {
 
 	return
 }
+
+// FsyncFileOp returns a FileOp that fsyncs the given file.
+// It would be even better if we would do this on the open file, but
+// unfortunately we don't have the API for it just yet. This should be good
+// enough for most cases on modern filesystems anyway.
+func FsyncFileOp(fname string) error {
+	f, err := os.OpenFile(fname, os.O_RDWR, 0)
+	if err != nil {
+		return err
+	}
+
+	if err = f.Sync(); err != nil {
+		f.Close()
+		return err
+	}
+
+	return f.Close()
+}
diff --git a/internal/safeio/safeio_test.go b/internal/safeio/safeio_test.go
index 021619e..574a7be 100644
--- a/internal/safeio/safeio_test.go
+++ b/internal/safeio/safeio_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
+	"io/fs"
 	"os"
 	"strings"
 	"testing"
@@ -244,3 +245,30 @@ func checkNotExists(t *testing.T, fname string) {
 		t.Fatalf("file %v exists", fname)
 	}
 }
+
+func TestFsyncFileOp(t *testing.T) {
+	// Check it as an operation on a normal file.
+	dir := testlib.MustTempDir(t)
+	defer testlib.RemoveIfOk(t, dir)
+
+	content := []byte("content 1")
+	err := testWriteFile("file1", content, 0660, FsyncFileOp)
+	if err != nil {
+		t.Error(err)
+	}
+
+	// Use a file that doesn't exist to trigger an Open error.
+	err = FsyncFileOp("doesnotexist")
+	if !errors.Is(err, fs.ErrNotExist) {
+		t.Errorf("expected an fs.ErrNotExist, got %#v", err)
+	}
+
+	// To trigger a Sync error, we use /dev/null, which is writable but cannot
+	// be synced. Confirm the error is a fs.PathError and comes from the sync
+	// operation, to make sure we're not accidentally failing on Open or
+	// Close.
+	err = FsyncFileOp("/dev/null")
+	if pe, ok := err.(*fs.PathError); !ok || pe.Op != "sync" {
+		t.Errorf("expected a fs.PathError from sync, got %#v", err)
+	}
+}