git » chasquid » commit e32ba1e

courier: Make procmail's sanitize be more unicode friendly

author Alberto Bertogli
2016-07-16 11:05:11 UTC
committer Alberto Bertogli
2016-07-16 11:33:34 UTC
parent 724d915e9586aad746f67341c1f34f05c7c418d9

courier: Make procmail's sanitize be more unicode friendly

Sanitize only lets some ascii characters go through, which is very unfriendly
to non-ascii usernames.

This patch changes it to be more inclusive, and filter out only selected
characters that may be problematic for the subprocesses, specially considering
UNIX shell environments. It's not meant to catch all possible problems, just
help prevent some common ones.

internal/courier/procmail.go +10 -4
internal/courier/procmail_test.go +29 -0

diff --git a/internal/courier/procmail.go b/internal/courier/procmail.go
index bc21cb9..06c3a52 100644
--- a/internal/courier/procmail.go
+++ b/internal/courier/procmail.go
@@ -6,6 +6,7 @@ import (
 	"os/exec"
 	"strings"
 	"time"
+	"unicode"
 
 	"blitiri.com.ar/go/chasquid/internal/trace"
 )
@@ -79,14 +80,19 @@ func (p *Procmail) Deliver(from string, to string, data []byte) error {
 	return nil
 }
 
-// sanitizeForProcmail cleans the string, leaving only [a-zA-Z-.].
+// sanitizeForProcmail cleans the string, removing characters that could be
+// problematic considering we will run an external command.
+//
+// The server does not rely on this to do substitution or proper filtering,
+// that's done at a different layer; this is just for defense in depth.
 func sanitizeForProcmail(s string) string {
 	valid := func(r rune) rune {
 		switch {
-		case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z', r == '-', r == '.':
-			return r
-		default:
+		case unicode.IsSpace(r), unicode.IsControl(r),
+			strings.ContainsRune("/;\"'\\|*&$%()[]{}`!", r):
 			return rune(-1)
+		default:
+			return r
 		}
 	}
 	return strings.Map(valid, s)
diff --git a/internal/courier/procmail_test.go b/internal/courier/procmail_test.go
index 7e2d35c..f700a18 100644
--- a/internal/courier/procmail_test.go
+++ b/internal/courier/procmail_test.go
@@ -63,3 +63,32 @@ func TestProcmailBadCommandLine(t *testing.T) {
 		t.Errorf("Unexpected success: %q %v", procmailBin, procmailArgs)
 	}
 }
+
+func TestSanitize(t *testing.T) {
+	cases := []struct{ v, expected string }{
+		// These are the same.
+		{"thisisfine", "thisisfine"},
+		{"ñaca", "ñaca"},
+		{"123-456_789", "123-456_789"},
+		{"123+456~789", "123+456~789"},
+
+		// These have problematic characters that get dropped.
+		{"with spaces", "withspaces"},
+		{"with/slash", "withslash"},
+		{"quote';andsemicolon", "quoteandsemicolon"},
+		{"a;b", "ab"},
+		{`"test"`, "test"},
+
+		// Interesting cases taken from
+		// http://www.user.uni-hannover.de/nhtcapri/bidirectional-text.html
+		// We allow them, they're the same on both sides.
+		{"١٩٩٩–١٢–٣١", "١٩٩٩–١٢–٣١"},
+		{"موزه‌ها", "موزه\u200cها"},
+	}
+	for _, c := range cases {
+		out := sanitizeForProcmail(c.v)
+		if out != c.expected {
+			t.Errorf("%q: expected %q, got %q", c.v, c.expected, out)
+		}
+	}
+}